1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
45 #include "basic-block.h"
48 #include "target-def.h"
49 #include "langhooks.h"
54 #include "tm-constrs.h"
58 static int x86_builtin_vectorization_cost (bool);
59 static rtx legitimize_dllimport_symbol (rtx, bool);
61 #ifndef CHECK_STACK_LIMIT
62 #define CHECK_STACK_LIMIT (-1)
65 /* Return index of given mode in mult and division cost tables. */
66 #define MODE_INDEX(mode) \
67 ((mode) == QImode ? 0 \
68 : (mode) == HImode ? 1 \
69 : (mode) == SImode ? 2 \
70 : (mode) == DImode ? 3 \
73 /* Processor costs (relative to an add) */
74 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
75 #define COSTS_N_BYTES(N) ((N) * 2)
77 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
80 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
81 COSTS_N_BYTES (2), /* cost of an add instruction */
82 COSTS_N_BYTES (3), /* cost of a lea instruction */
83 COSTS_N_BYTES (2), /* variable shift costs */
84 COSTS_N_BYTES (3), /* constant shift costs */
85 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 0, /* cost of multiply per each bit set */
91 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
92 COSTS_N_BYTES (3), /* HI */
93 COSTS_N_BYTES (3), /* SI */
94 COSTS_N_BYTES (3), /* DI */
95 COSTS_N_BYTES (5)}, /* other */
96 COSTS_N_BYTES (3), /* cost of movsx */
97 COSTS_N_BYTES (3), /* cost of movzx */
100 2, /* cost for loading QImode using movzbl */
101 {2, 2, 2}, /* cost of loading integer registers
102 in QImode, HImode and SImode.
103 Relative to reg-reg move (2). */
104 {2, 2, 2}, /* cost of storing integer registers */
105 2, /* cost of reg,reg fld/fst */
106 {2, 2, 2}, /* cost of loading fp registers
107 in SFmode, DFmode and XFmode */
108 {2, 2, 2}, /* cost of storing fp registers
109 in SFmode, DFmode and XFmode */
110 3, /* cost of moving MMX register */
111 {3, 3}, /* cost of loading MMX registers
112 in SImode and DImode */
113 {3, 3}, /* cost of storing MMX registers
114 in SImode and DImode */
115 3, /* cost of moving SSE register */
116 {3, 3, 3}, /* cost of loading SSE registers
117 in SImode, DImode and TImode */
118 {3, 3, 3}, /* cost of storing SSE registers
119 in SImode, DImode and TImode */
120 3, /* MMX or SSE register to integer */
121 0, /* size of l1 cache */
122 0, /* size of l2 cache */
123 0, /* size of prefetch block */
124 0, /* number of parallel prefetches */
126 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
127 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
128 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
129 COSTS_N_BYTES (2), /* cost of FABS instruction. */
130 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
131 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
132 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
133 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
134 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
135 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
136 1, /* scalar_stmt_cost. */
137 1, /* scalar load_cost. */
138 1, /* scalar_store_cost. */
139 1, /* vec_stmt_cost. */
140 1, /* vec_to_scalar_cost. */
141 1, /* scalar_to_vec_cost. */
142 1, /* vec_align_load_cost. */
143 1, /* vec_unalign_load_cost. */
144 1, /* vec_store_cost. */
145 1, /* cond_taken_branch_cost. */
146 1, /* cond_not_taken_branch_cost. */
149 /* Processor costs (relative to an add) */
151 struct processor_costs i386_cost = { /* 386 specific costs */
152 COSTS_N_INSNS (1), /* cost of an add instruction */
153 COSTS_N_INSNS (1), /* cost of a lea instruction */
154 COSTS_N_INSNS (3), /* variable shift costs */
155 COSTS_N_INSNS (2), /* constant shift costs */
156 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
157 COSTS_N_INSNS (6), /* HI */
158 COSTS_N_INSNS (6), /* SI */
159 COSTS_N_INSNS (6), /* DI */
160 COSTS_N_INSNS (6)}, /* other */
161 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
162 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
163 COSTS_N_INSNS (23), /* HI */
164 COSTS_N_INSNS (23), /* SI */
165 COSTS_N_INSNS (23), /* DI */
166 COSTS_N_INSNS (23)}, /* other */
167 COSTS_N_INSNS (3), /* cost of movsx */
168 COSTS_N_INSNS (2), /* cost of movzx */
169 15, /* "large" insn */
171 4, /* cost for loading QImode using movzbl */
172 {2, 4, 2}, /* cost of loading integer registers
173 in QImode, HImode and SImode.
174 Relative to reg-reg move (2). */
175 {2, 4, 2}, /* cost of storing integer registers */
176 2, /* cost of reg,reg fld/fst */
177 {8, 8, 8}, /* cost of loading fp registers
178 in SFmode, DFmode and XFmode */
179 {8, 8, 8}, /* cost of storing fp registers
180 in SFmode, DFmode and XFmode */
181 2, /* cost of moving MMX register */
182 {4, 8}, /* cost of loading MMX registers
183 in SImode and DImode */
184 {4, 8}, /* cost of storing MMX registers
185 in SImode and DImode */
186 2, /* cost of moving SSE register */
187 {4, 8, 16}, /* cost of loading SSE registers
188 in SImode, DImode and TImode */
189 {4, 8, 16}, /* cost of storing SSE registers
190 in SImode, DImode and TImode */
191 3, /* MMX or SSE register to integer */
192 0, /* size of l1 cache */
193 0, /* size of l2 cache */
194 0, /* size of prefetch block */
195 0, /* number of parallel prefetches */
197 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
198 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
199 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
200 COSTS_N_INSNS (22), /* cost of FABS instruction. */
201 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
202 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
203 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
204 DUMMY_STRINGOP_ALGS},
205 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
206 DUMMY_STRINGOP_ALGS},
207 1, /* scalar_stmt_cost. */
208 1, /* scalar load_cost. */
209 1, /* scalar_store_cost. */
210 1, /* vec_stmt_cost. */
211 1, /* vec_to_scalar_cost. */
212 1, /* scalar_to_vec_cost. */
213 1, /* vec_align_load_cost. */
214 2, /* vec_unalign_load_cost. */
215 1, /* vec_store_cost. */
216 3, /* cond_taken_branch_cost. */
217 1, /* cond_not_taken_branch_cost. */
221 struct processor_costs i486_cost = { /* 486 specific costs */
222 COSTS_N_INSNS (1), /* cost of an add instruction */
223 COSTS_N_INSNS (1), /* cost of a lea instruction */
224 COSTS_N_INSNS (3), /* variable shift costs */
225 COSTS_N_INSNS (2), /* constant shift costs */
226 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
227 COSTS_N_INSNS (12), /* HI */
228 COSTS_N_INSNS (12), /* SI */
229 COSTS_N_INSNS (12), /* DI */
230 COSTS_N_INSNS (12)}, /* other */
231 1, /* cost of multiply per each bit set */
232 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
233 COSTS_N_INSNS (40), /* HI */
234 COSTS_N_INSNS (40), /* SI */
235 COSTS_N_INSNS (40), /* DI */
236 COSTS_N_INSNS (40)}, /* other */
237 COSTS_N_INSNS (3), /* cost of movsx */
238 COSTS_N_INSNS (2), /* cost of movzx */
239 15, /* "large" insn */
241 4, /* cost for loading QImode using movzbl */
242 {2, 4, 2}, /* cost of loading integer registers
243 in QImode, HImode and SImode.
244 Relative to reg-reg move (2). */
245 {2, 4, 2}, /* cost of storing integer registers */
246 2, /* cost of reg,reg fld/fst */
247 {8, 8, 8}, /* cost of loading fp registers
248 in SFmode, DFmode and XFmode */
249 {8, 8, 8}, /* cost of storing fp registers
250 in SFmode, DFmode and XFmode */
251 2, /* cost of moving MMX register */
252 {4, 8}, /* cost of loading MMX registers
253 in SImode and DImode */
254 {4, 8}, /* cost of storing MMX registers
255 in SImode and DImode */
256 2, /* cost of moving SSE register */
257 {4, 8, 16}, /* cost of loading SSE registers
258 in SImode, DImode and TImode */
259 {4, 8, 16}, /* cost of storing SSE registers
260 in SImode, DImode and TImode */
261 3, /* MMX or SSE register to integer */
262 4, /* size of l1 cache. 486 has 8kB cache
263 shared for code and data, so 4kB is
264 not really precise. */
265 4, /* size of l2 cache */
266 0, /* size of prefetch block */
267 0, /* number of parallel prefetches */
269 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
270 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
271 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
272 COSTS_N_INSNS (3), /* cost of FABS instruction. */
273 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
274 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
275 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
276 DUMMY_STRINGOP_ALGS},
277 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
278 DUMMY_STRINGOP_ALGS},
279 1, /* scalar_stmt_cost. */
280 1, /* scalar load_cost. */
281 1, /* scalar_store_cost. */
282 1, /* vec_stmt_cost. */
283 1, /* vec_to_scalar_cost. */
284 1, /* scalar_to_vec_cost. */
285 1, /* vec_align_load_cost. */
286 2, /* vec_unalign_load_cost. */
287 1, /* vec_store_cost. */
288 3, /* cond_taken_branch_cost. */
289 1, /* cond_not_taken_branch_cost. */
293 struct processor_costs pentium_cost = {
294 COSTS_N_INSNS (1), /* cost of an add instruction */
295 COSTS_N_INSNS (1), /* cost of a lea instruction */
296 COSTS_N_INSNS (4), /* variable shift costs */
297 COSTS_N_INSNS (1), /* constant shift costs */
298 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
299 COSTS_N_INSNS (11), /* HI */
300 COSTS_N_INSNS (11), /* SI */
301 COSTS_N_INSNS (11), /* DI */
302 COSTS_N_INSNS (11)}, /* other */
303 0, /* cost of multiply per each bit set */
304 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
305 COSTS_N_INSNS (25), /* HI */
306 COSTS_N_INSNS (25), /* SI */
307 COSTS_N_INSNS (25), /* DI */
308 COSTS_N_INSNS (25)}, /* other */
309 COSTS_N_INSNS (3), /* cost of movsx */
310 COSTS_N_INSNS (2), /* cost of movzx */
311 8, /* "large" insn */
313 6, /* cost for loading QImode using movzbl */
314 {2, 4, 2}, /* cost of loading integer registers
315 in QImode, HImode and SImode.
316 Relative to reg-reg move (2). */
317 {2, 4, 2}, /* cost of storing integer registers */
318 2, /* cost of reg,reg fld/fst */
319 {2, 2, 6}, /* cost of loading fp registers
320 in SFmode, DFmode and XFmode */
321 {4, 4, 6}, /* cost of storing fp registers
322 in SFmode, DFmode and XFmode */
323 8, /* cost of moving MMX register */
324 {8, 8}, /* cost of loading MMX registers
325 in SImode and DImode */
326 {8, 8}, /* cost of storing MMX registers
327 in SImode and DImode */
328 2, /* cost of moving SSE register */
329 {4, 8, 16}, /* cost of loading SSE registers
330 in SImode, DImode and TImode */
331 {4, 8, 16}, /* cost of storing SSE registers
332 in SImode, DImode and TImode */
333 3, /* MMX or SSE register to integer */
334 8, /* size of l1 cache. */
335 8, /* size of l2 cache */
336 0, /* size of prefetch block */
337 0, /* number of parallel prefetches */
339 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
340 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
341 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
342 COSTS_N_INSNS (1), /* cost of FABS instruction. */
343 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
344 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
345 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
346 DUMMY_STRINGOP_ALGS},
347 {{libcall, {{-1, rep_prefix_4_byte}}},
348 DUMMY_STRINGOP_ALGS},
349 1, /* scalar_stmt_cost. */
350 1, /* scalar load_cost. */
351 1, /* scalar_store_cost. */
352 1, /* vec_stmt_cost. */
353 1, /* vec_to_scalar_cost. */
354 1, /* scalar_to_vec_cost. */
355 1, /* vec_align_load_cost. */
356 2, /* vec_unalign_load_cost. */
357 1, /* vec_store_cost. */
358 3, /* cond_taken_branch_cost. */
359 1, /* cond_not_taken_branch_cost. */
363 struct processor_costs pentiumpro_cost = {
364 COSTS_N_INSNS (1), /* cost of an add instruction */
365 COSTS_N_INSNS (1), /* cost of a lea instruction */
366 COSTS_N_INSNS (1), /* variable shift costs */
367 COSTS_N_INSNS (1), /* constant shift costs */
368 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
369 COSTS_N_INSNS (4), /* HI */
370 COSTS_N_INSNS (4), /* SI */
371 COSTS_N_INSNS (4), /* DI */
372 COSTS_N_INSNS (4)}, /* other */
373 0, /* cost of multiply per each bit set */
374 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
375 COSTS_N_INSNS (17), /* HI */
376 COSTS_N_INSNS (17), /* SI */
377 COSTS_N_INSNS (17), /* DI */
378 COSTS_N_INSNS (17)}, /* other */
379 COSTS_N_INSNS (1), /* cost of movsx */
380 COSTS_N_INSNS (1), /* cost of movzx */
381 8, /* "large" insn */
383 2, /* cost for loading QImode using movzbl */
384 {4, 4, 4}, /* cost of loading integer registers
385 in QImode, HImode and SImode.
386 Relative to reg-reg move (2). */
387 {2, 2, 2}, /* cost of storing integer registers */
388 2, /* cost of reg,reg fld/fst */
389 {2, 2, 6}, /* cost of loading fp registers
390 in SFmode, DFmode and XFmode */
391 {4, 4, 6}, /* cost of storing fp registers
392 in SFmode, DFmode and XFmode */
393 2, /* cost of moving MMX register */
394 {2, 2}, /* cost of loading MMX registers
395 in SImode and DImode */
396 {2, 2}, /* cost of storing MMX registers
397 in SImode and DImode */
398 2, /* cost of moving SSE register */
399 {2, 2, 8}, /* cost of loading SSE registers
400 in SImode, DImode and TImode */
401 {2, 2, 8}, /* cost of storing SSE registers
402 in SImode, DImode and TImode */
403 3, /* MMX or SSE register to integer */
404 8, /* size of l1 cache. */
405 256, /* size of l2 cache */
406 32, /* size of prefetch block */
407 6, /* number of parallel prefetches */
409 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
410 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
411 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
412 COSTS_N_INSNS (2), /* cost of FABS instruction. */
413 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
414 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
415 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
416 the alignment). For small blocks inline loop is still a noticeable win, for bigger
417 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
418 more expensive startup time in CPU, but after 4K the difference is down in the noise.
420 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
421 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
422 DUMMY_STRINGOP_ALGS},
423 {{rep_prefix_4_byte, {{1024, unrolled_loop},
424 {8192, rep_prefix_4_byte}, {-1, libcall}}},
425 DUMMY_STRINGOP_ALGS},
426 1, /* scalar_stmt_cost. */
427 1, /* scalar load_cost. */
428 1, /* scalar_store_cost. */
429 1, /* vec_stmt_cost. */
430 1, /* vec_to_scalar_cost. */
431 1, /* scalar_to_vec_cost. */
432 1, /* vec_align_load_cost. */
433 2, /* vec_unalign_load_cost. */
434 1, /* vec_store_cost. */
435 3, /* cond_taken_branch_cost. */
436 1, /* cond_not_taken_branch_cost. */
440 struct processor_costs geode_cost = {
441 COSTS_N_INSNS (1), /* cost of an add instruction */
442 COSTS_N_INSNS (1), /* cost of a lea instruction */
443 COSTS_N_INSNS (2), /* variable shift costs */
444 COSTS_N_INSNS (1), /* constant shift costs */
445 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
446 COSTS_N_INSNS (4), /* HI */
447 COSTS_N_INSNS (7), /* SI */
448 COSTS_N_INSNS (7), /* DI */
449 COSTS_N_INSNS (7)}, /* other */
450 0, /* cost of multiply per each bit set */
451 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
452 COSTS_N_INSNS (23), /* HI */
453 COSTS_N_INSNS (39), /* SI */
454 COSTS_N_INSNS (39), /* DI */
455 COSTS_N_INSNS (39)}, /* other */
456 COSTS_N_INSNS (1), /* cost of movsx */
457 COSTS_N_INSNS (1), /* cost of movzx */
458 8, /* "large" insn */
460 1, /* cost for loading QImode using movzbl */
461 {1, 1, 1}, /* cost of loading integer registers
462 in QImode, HImode and SImode.
463 Relative to reg-reg move (2). */
464 {1, 1, 1}, /* cost of storing integer registers */
465 1, /* cost of reg,reg fld/fst */
466 {1, 1, 1}, /* cost of loading fp registers
467 in SFmode, DFmode and XFmode */
468 {4, 6, 6}, /* cost of storing fp registers
469 in SFmode, DFmode and XFmode */
471 1, /* cost of moving MMX register */
472 {1, 1}, /* cost of loading MMX registers
473 in SImode and DImode */
474 {1, 1}, /* cost of storing MMX registers
475 in SImode and DImode */
476 1, /* cost of moving SSE register */
477 {1, 1, 1}, /* cost of loading SSE registers
478 in SImode, DImode and TImode */
479 {1, 1, 1}, /* cost of storing SSE registers
480 in SImode, DImode and TImode */
481 1, /* MMX or SSE register to integer */
482 64, /* size of l1 cache. */
483 128, /* size of l2 cache. */
484 32, /* size of prefetch block */
485 1, /* number of parallel prefetches */
487 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
488 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
489 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
490 COSTS_N_INSNS (1), /* cost of FABS instruction. */
491 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
492 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
493 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
494 DUMMY_STRINGOP_ALGS},
495 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
496 DUMMY_STRINGOP_ALGS},
497 1, /* scalar_stmt_cost. */
498 1, /* scalar load_cost. */
499 1, /* scalar_store_cost. */
500 1, /* vec_stmt_cost. */
501 1, /* vec_to_scalar_cost. */
502 1, /* scalar_to_vec_cost. */
503 1, /* vec_align_load_cost. */
504 2, /* vec_unalign_load_cost. */
505 1, /* vec_store_cost. */
506 3, /* cond_taken_branch_cost. */
507 1, /* cond_not_taken_branch_cost. */
511 struct processor_costs k6_cost = {
512 COSTS_N_INSNS (1), /* cost of an add instruction */
513 COSTS_N_INSNS (2), /* cost of a lea instruction */
514 COSTS_N_INSNS (1), /* variable shift costs */
515 COSTS_N_INSNS (1), /* constant shift costs */
516 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
517 COSTS_N_INSNS (3), /* HI */
518 COSTS_N_INSNS (3), /* SI */
519 COSTS_N_INSNS (3), /* DI */
520 COSTS_N_INSNS (3)}, /* other */
521 0, /* cost of multiply per each bit set */
522 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
523 COSTS_N_INSNS (18), /* HI */
524 COSTS_N_INSNS (18), /* SI */
525 COSTS_N_INSNS (18), /* DI */
526 COSTS_N_INSNS (18)}, /* other */
527 COSTS_N_INSNS (2), /* cost of movsx */
528 COSTS_N_INSNS (2), /* cost of movzx */
529 8, /* "large" insn */
531 3, /* cost for loading QImode using movzbl */
532 {4, 5, 4}, /* cost of loading integer registers
533 in QImode, HImode and SImode.
534 Relative to reg-reg move (2). */
535 {2, 3, 2}, /* cost of storing integer registers */
536 4, /* cost of reg,reg fld/fst */
537 {6, 6, 6}, /* cost of loading fp registers
538 in SFmode, DFmode and XFmode */
539 {4, 4, 4}, /* cost of storing fp registers
540 in SFmode, DFmode and XFmode */
541 2, /* cost of moving MMX register */
542 {2, 2}, /* cost of loading MMX registers
543 in SImode and DImode */
544 {2, 2}, /* cost of storing MMX registers
545 in SImode and DImode */
546 2, /* cost of moving SSE register */
547 {2, 2, 8}, /* cost of loading SSE registers
548 in SImode, DImode and TImode */
549 {2, 2, 8}, /* cost of storing SSE registers
550 in SImode, DImode and TImode */
551 6, /* MMX or SSE register to integer */
552 32, /* size of l1 cache. */
553 32, /* size of l2 cache. Some models
554 have integrated l2 cache, but
555 optimizing for k6 is not important
556 enough to worry about that. */
557 32, /* size of prefetch block */
558 1, /* number of parallel prefetches */
560 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
561 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
562 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
563 COSTS_N_INSNS (2), /* cost of FABS instruction. */
564 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
565 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
566 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
567 DUMMY_STRINGOP_ALGS},
568 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
569 DUMMY_STRINGOP_ALGS},
570 1, /* scalar_stmt_cost. */
571 1, /* scalar load_cost. */
572 1, /* scalar_store_cost. */
573 1, /* vec_stmt_cost. */
574 1, /* vec_to_scalar_cost. */
575 1, /* scalar_to_vec_cost. */
576 1, /* vec_align_load_cost. */
577 2, /* vec_unalign_load_cost. */
578 1, /* vec_store_cost. */
579 3, /* cond_taken_branch_cost. */
580 1, /* cond_not_taken_branch_cost. */
584 struct processor_costs athlon_cost = {
585 COSTS_N_INSNS (1), /* cost of an add instruction */
586 COSTS_N_INSNS (2), /* cost of a lea instruction */
587 COSTS_N_INSNS (1), /* variable shift costs */
588 COSTS_N_INSNS (1), /* constant shift costs */
589 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
590 COSTS_N_INSNS (5), /* HI */
591 COSTS_N_INSNS (5), /* SI */
592 COSTS_N_INSNS (5), /* DI */
593 COSTS_N_INSNS (5)}, /* other */
594 0, /* cost of multiply per each bit set */
595 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
596 COSTS_N_INSNS (26), /* HI */
597 COSTS_N_INSNS (42), /* SI */
598 COSTS_N_INSNS (74), /* DI */
599 COSTS_N_INSNS (74)}, /* other */
600 COSTS_N_INSNS (1), /* cost of movsx */
601 COSTS_N_INSNS (1), /* cost of movzx */
602 8, /* "large" insn */
604 4, /* cost for loading QImode using movzbl */
605 {3, 4, 3}, /* cost of loading integer registers
606 in QImode, HImode and SImode.
607 Relative to reg-reg move (2). */
608 {3, 4, 3}, /* cost of storing integer registers */
609 4, /* cost of reg,reg fld/fst */
610 {4, 4, 12}, /* cost of loading fp registers
611 in SFmode, DFmode and XFmode */
612 {6, 6, 8}, /* cost of storing fp registers
613 in SFmode, DFmode and XFmode */
614 2, /* cost of moving MMX register */
615 {4, 4}, /* cost of loading MMX registers
616 in SImode and DImode */
617 {4, 4}, /* cost of storing MMX registers
618 in SImode and DImode */
619 2, /* cost of moving SSE register */
620 {4, 4, 6}, /* cost of loading SSE registers
621 in SImode, DImode and TImode */
622 {4, 4, 5}, /* cost of storing SSE registers
623 in SImode, DImode and TImode */
624 5, /* MMX or SSE register to integer */
625 64, /* size of l1 cache. */
626 256, /* size of l2 cache. */
627 64, /* size of prefetch block */
628 6, /* number of parallel prefetches */
630 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
631 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
632 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
633 COSTS_N_INSNS (2), /* cost of FABS instruction. */
634 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
635 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
636 /* For some reason, Athlon deals better with REP prefix (relative to loops)
637 compared to K8. Alignment becomes important after 8 bytes for memcpy and
638 128 bytes for memset. */
639 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
640 DUMMY_STRINGOP_ALGS},
641 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
642 DUMMY_STRINGOP_ALGS},
643 1, /* scalar_stmt_cost. */
644 1, /* scalar load_cost. */
645 1, /* scalar_store_cost. */
646 1, /* vec_stmt_cost. */
647 1, /* vec_to_scalar_cost. */
648 1, /* scalar_to_vec_cost. */
649 1, /* vec_align_load_cost. */
650 2, /* vec_unalign_load_cost. */
651 1, /* vec_store_cost. */
652 3, /* cond_taken_branch_cost. */
653 1, /* cond_not_taken_branch_cost. */
657 struct processor_costs k8_cost = {
658 COSTS_N_INSNS (1), /* cost of an add instruction */
659 COSTS_N_INSNS (2), /* cost of a lea instruction */
660 COSTS_N_INSNS (1), /* variable shift costs */
661 COSTS_N_INSNS (1), /* constant shift costs */
662 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
663 COSTS_N_INSNS (4), /* HI */
664 COSTS_N_INSNS (3), /* SI */
665 COSTS_N_INSNS (4), /* DI */
666 COSTS_N_INSNS (5)}, /* other */
667 0, /* cost of multiply per each bit set */
668 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
669 COSTS_N_INSNS (26), /* HI */
670 COSTS_N_INSNS (42), /* SI */
671 COSTS_N_INSNS (74), /* DI */
672 COSTS_N_INSNS (74)}, /* other */
673 COSTS_N_INSNS (1), /* cost of movsx */
674 COSTS_N_INSNS (1), /* cost of movzx */
675 8, /* "large" insn */
677 4, /* cost for loading QImode using movzbl */
678 {3, 4, 3}, /* cost of loading integer registers
679 in QImode, HImode and SImode.
680 Relative to reg-reg move (2). */
681 {3, 4, 3}, /* cost of storing integer registers */
682 4, /* cost of reg,reg fld/fst */
683 {4, 4, 12}, /* cost of loading fp registers
684 in SFmode, DFmode and XFmode */
685 {6, 6, 8}, /* cost of storing fp registers
686 in SFmode, DFmode and XFmode */
687 2, /* cost of moving MMX register */
688 {3, 3}, /* cost of loading MMX registers
689 in SImode and DImode */
690 {4, 4}, /* cost of storing MMX registers
691 in SImode and DImode */
692 2, /* cost of moving SSE register */
693 {4, 3, 6}, /* cost of loading SSE registers
694 in SImode, DImode and TImode */
695 {4, 4, 5}, /* cost of storing SSE registers
696 in SImode, DImode and TImode */
697 5, /* MMX or SSE register to integer */
698 64, /* size of l1 cache. */
699 512, /* size of l2 cache. */
700 64, /* size of prefetch block */
701 /* New AMD processors never drop prefetches; if they cannot be performed
702 immediately, they are queued. We set number of simultaneous prefetches
703 to a large constant to reflect this (it probably is not a good idea not
704 to limit number of prefetches at all, as their execution also takes some
706 100, /* number of parallel prefetches */
708 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
709 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
710 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
711 COSTS_N_INSNS (2), /* cost of FABS instruction. */
712 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
713 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
714 /* K8 has optimized REP instruction for medium sized blocks, but for very small
715 blocks it is better to use loop. For large blocks, libcall can do
716 nontemporary accesses and beat inline considerably. */
717 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
718 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
719 {{libcall, {{8, loop}, {24, unrolled_loop},
720 {2048, rep_prefix_4_byte}, {-1, libcall}}},
721 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
722 4, /* scalar_stmt_cost. */
723 2, /* scalar load_cost. */
724 2, /* scalar_store_cost. */
725 5, /* vec_stmt_cost. */
726 0, /* vec_to_scalar_cost. */
727 2, /* scalar_to_vec_cost. */
728 2, /* vec_align_load_cost. */
729 3, /* vec_unalign_load_cost. */
730 3, /* vec_store_cost. */
731 3, /* cond_taken_branch_cost. */
732 2, /* cond_not_taken_branch_cost. */
735 struct processor_costs amdfam10_cost = {
736 COSTS_N_INSNS (1), /* cost of an add instruction */
737 COSTS_N_INSNS (2), /* cost of a lea instruction */
738 COSTS_N_INSNS (1), /* variable shift costs */
739 COSTS_N_INSNS (1), /* constant shift costs */
740 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
741 COSTS_N_INSNS (4), /* HI */
742 COSTS_N_INSNS (3), /* SI */
743 COSTS_N_INSNS (4), /* DI */
744 COSTS_N_INSNS (5)}, /* other */
745 0, /* cost of multiply per each bit set */
746 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
747 COSTS_N_INSNS (35), /* HI */
748 COSTS_N_INSNS (51), /* SI */
749 COSTS_N_INSNS (83), /* DI */
750 COSTS_N_INSNS (83)}, /* other */
751 COSTS_N_INSNS (1), /* cost of movsx */
752 COSTS_N_INSNS (1), /* cost of movzx */
753 8, /* "large" insn */
755 4, /* cost for loading QImode using movzbl */
756 {3, 4, 3}, /* cost of loading integer registers
757 in QImode, HImode and SImode.
758 Relative to reg-reg move (2). */
759 {3, 4, 3}, /* cost of storing integer registers */
760 4, /* cost of reg,reg fld/fst */
761 {4, 4, 12}, /* cost of loading fp registers
762 in SFmode, DFmode and XFmode */
763 {6, 6, 8}, /* cost of storing fp registers
764 in SFmode, DFmode and XFmode */
765 2, /* cost of moving MMX register */
766 {3, 3}, /* cost of loading MMX registers
767 in SImode and DImode */
768 {4, 4}, /* cost of storing MMX registers
769 in SImode and DImode */
770 2, /* cost of moving SSE register */
771 {4, 4, 3}, /* cost of loading SSE registers
772 in SImode, DImode and TImode */
773 {4, 4, 5}, /* cost of storing SSE registers
774 in SImode, DImode and TImode */
775 3, /* MMX or SSE register to integer */
777 MOVD reg64, xmmreg Double FSTORE 4
778 MOVD reg32, xmmreg Double FSTORE 4
780 MOVD reg64, xmmreg Double FADD 3
782 MOVD reg32, xmmreg Double FADD 3
784 64, /* size of l1 cache. */
785 512, /* size of l2 cache. */
786 64, /* size of prefetch block */
787 /* New AMD processors never drop prefetches; if they cannot be performed
788 immediately, they are queued. We set number of simultaneous prefetches
789 to a large constant to reflect this (it probably is not a good idea not
790 to limit number of prefetches at all, as their execution also takes some
792 100, /* number of parallel prefetches */
794 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
795 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
796 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
797 COSTS_N_INSNS (2), /* cost of FABS instruction. */
798 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
799 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
801 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
802 very small blocks it is better to use loop. For large blocks, libcall can
803 do nontemporary accesses and beat inline considerably. */
804 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
805 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
806 {{libcall, {{8, loop}, {24, unrolled_loop},
807 {2048, rep_prefix_4_byte}, {-1, libcall}}},
808 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
809 4, /* scalar_stmt_cost. */
810 2, /* scalar load_cost. */
811 2, /* scalar_store_cost. */
812 6, /* vec_stmt_cost. */
813 0, /* vec_to_scalar_cost. */
814 2, /* scalar_to_vec_cost. */
815 2, /* vec_align_load_cost. */
816 2, /* vec_unalign_load_cost. */
817 2, /* vec_store_cost. */
818 2, /* cond_taken_branch_cost. */
819 1, /* cond_not_taken_branch_cost. */
823 struct processor_costs pentium4_cost = {
824 COSTS_N_INSNS (1), /* cost of an add instruction */
825 COSTS_N_INSNS (3), /* cost of a lea instruction */
826 COSTS_N_INSNS (4), /* variable shift costs */
827 COSTS_N_INSNS (4), /* constant shift costs */
828 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
829 COSTS_N_INSNS (15), /* HI */
830 COSTS_N_INSNS (15), /* SI */
831 COSTS_N_INSNS (15), /* DI */
832 COSTS_N_INSNS (15)}, /* other */
833 0, /* cost of multiply per each bit set */
834 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
835 COSTS_N_INSNS (56), /* HI */
836 COSTS_N_INSNS (56), /* SI */
837 COSTS_N_INSNS (56), /* DI */
838 COSTS_N_INSNS (56)}, /* other */
839 COSTS_N_INSNS (1), /* cost of movsx */
840 COSTS_N_INSNS (1), /* cost of movzx */
841 16, /* "large" insn */
843 2, /* cost for loading QImode using movzbl */
844 {4, 5, 4}, /* cost of loading integer registers
845 in QImode, HImode and SImode.
846 Relative to reg-reg move (2). */
847 {2, 3, 2}, /* cost of storing integer registers */
848 2, /* cost of reg,reg fld/fst */
849 {2, 2, 6}, /* cost of loading fp registers
850 in SFmode, DFmode and XFmode */
851 {4, 4, 6}, /* cost of storing fp registers
852 in SFmode, DFmode and XFmode */
853 2, /* cost of moving MMX register */
854 {2, 2}, /* cost of loading MMX registers
855 in SImode and DImode */
856 {2, 2}, /* cost of storing MMX registers
857 in SImode and DImode */
858 12, /* cost of moving SSE register */
859 {12, 12, 12}, /* cost of loading SSE registers
860 in SImode, DImode and TImode */
861 {2, 2, 8}, /* cost of storing SSE registers
862 in SImode, DImode and TImode */
863 10, /* MMX or SSE register to integer */
864 8, /* size of l1 cache. */
865 256, /* size of l2 cache. */
866 64, /* size of prefetch block */
867 6, /* number of parallel prefetches */
869 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
870 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
871 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
872 COSTS_N_INSNS (2), /* cost of FABS instruction. */
873 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
874 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
875 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
876 DUMMY_STRINGOP_ALGS},
877 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
879 DUMMY_STRINGOP_ALGS},
880 1, /* scalar_stmt_cost. */
881 1, /* scalar load_cost. */
882 1, /* scalar_store_cost. */
883 1, /* vec_stmt_cost. */
884 1, /* vec_to_scalar_cost. */
885 1, /* scalar_to_vec_cost. */
886 1, /* vec_align_load_cost. */
887 2, /* vec_unalign_load_cost. */
888 1, /* vec_store_cost. */
889 3, /* cond_taken_branch_cost. */
890 1, /* cond_not_taken_branch_cost. */
894 struct processor_costs nocona_cost = {
895 COSTS_N_INSNS (1), /* cost of an add instruction */
896 COSTS_N_INSNS (1), /* cost of a lea instruction */
897 COSTS_N_INSNS (1), /* variable shift costs */
898 COSTS_N_INSNS (1), /* constant shift costs */
899 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
900 COSTS_N_INSNS (10), /* HI */
901 COSTS_N_INSNS (10), /* SI */
902 COSTS_N_INSNS (10), /* DI */
903 COSTS_N_INSNS (10)}, /* other */
904 0, /* cost of multiply per each bit set */
905 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
906 COSTS_N_INSNS (66), /* HI */
907 COSTS_N_INSNS (66), /* SI */
908 COSTS_N_INSNS (66), /* DI */
909 COSTS_N_INSNS (66)}, /* other */
910 COSTS_N_INSNS (1), /* cost of movsx */
911 COSTS_N_INSNS (1), /* cost of movzx */
912 16, /* "large" insn */
914 4, /* cost for loading QImode using movzbl */
915 {4, 4, 4}, /* cost of loading integer registers
916 in QImode, HImode and SImode.
917 Relative to reg-reg move (2). */
918 {4, 4, 4}, /* cost of storing integer registers */
919 3, /* cost of reg,reg fld/fst */
920 {12, 12, 12}, /* cost of loading fp registers
921 in SFmode, DFmode and XFmode */
922 {4, 4, 4}, /* cost of storing fp registers
923 in SFmode, DFmode and XFmode */
924 6, /* cost of moving MMX register */
925 {12, 12}, /* cost of loading MMX registers
926 in SImode and DImode */
927 {12, 12}, /* cost of storing MMX registers
928 in SImode and DImode */
929 6, /* cost of moving SSE register */
930 {12, 12, 12}, /* cost of loading SSE registers
931 in SImode, DImode and TImode */
932 {12, 12, 12}, /* cost of storing SSE registers
933 in SImode, DImode and TImode */
934 8, /* MMX or SSE register to integer */
935 8, /* size of l1 cache. */
936 1024, /* size of l2 cache. */
937 128, /* size of prefetch block */
938 8, /* number of parallel prefetches */
940 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
941 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
942 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
943 COSTS_N_INSNS (3), /* cost of FABS instruction. */
944 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
945 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
946 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
947 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
948 {100000, unrolled_loop}, {-1, libcall}}}},
949 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
951 {libcall, {{24, loop}, {64, unrolled_loop},
952 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
953 1, /* scalar_stmt_cost. */
954 1, /* scalar load_cost. */
955 1, /* scalar_store_cost. */
956 1, /* vec_stmt_cost. */
957 1, /* vec_to_scalar_cost. */
958 1, /* scalar_to_vec_cost. */
959 1, /* vec_align_load_cost. */
960 2, /* vec_unalign_load_cost. */
961 1, /* vec_store_cost. */
962 3, /* cond_taken_branch_cost. */
963 1, /* cond_not_taken_branch_cost. */
967 struct processor_costs core2_cost = {
968 COSTS_N_INSNS (1), /* cost of an add instruction */
969 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
970 COSTS_N_INSNS (1), /* variable shift costs */
971 COSTS_N_INSNS (1), /* constant shift costs */
972 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
973 COSTS_N_INSNS (3), /* HI */
974 COSTS_N_INSNS (3), /* SI */
975 COSTS_N_INSNS (3), /* DI */
976 COSTS_N_INSNS (3)}, /* other */
977 0, /* cost of multiply per each bit set */
978 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
979 COSTS_N_INSNS (22), /* HI */
980 COSTS_N_INSNS (22), /* SI */
981 COSTS_N_INSNS (22), /* DI */
982 COSTS_N_INSNS (22)}, /* other */
983 COSTS_N_INSNS (1), /* cost of movsx */
984 COSTS_N_INSNS (1), /* cost of movzx */
985 8, /* "large" insn */
987 2, /* cost for loading QImode using movzbl */
988 {6, 6, 6}, /* cost of loading integer registers
989 in QImode, HImode and SImode.
990 Relative to reg-reg move (2). */
991 {4, 4, 4}, /* cost of storing integer registers */
992 2, /* cost of reg,reg fld/fst */
993 {6, 6, 6}, /* cost of loading fp registers
994 in SFmode, DFmode and XFmode */
995 {4, 4, 4}, /* cost of storing fp registers
996 in SFmode, DFmode and XFmode */
997 2, /* cost of moving MMX register */
998 {6, 6}, /* cost of loading MMX registers
999 in SImode and DImode */
1000 {4, 4}, /* cost of storing MMX registers
1001 in SImode and DImode */
1002 2, /* cost of moving SSE register */
1003 {6, 6, 6}, /* cost of loading SSE registers
1004 in SImode, DImode and TImode */
1005 {4, 4, 4}, /* cost of storing SSE registers
1006 in SImode, DImode and TImode */
1007 2, /* MMX or SSE register to integer */
1008 32, /* size of l1 cache. */
1009 2048, /* size of l2 cache. */
1010 128, /* size of prefetch block */
1011 8, /* number of parallel prefetches */
1012 3, /* Branch cost */
1013 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1014 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1015 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1016 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1017 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1018 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1019 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1020 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1021 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1022 {{libcall, {{8, loop}, {15, unrolled_loop},
1023 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1024 {libcall, {{24, loop}, {32, unrolled_loop},
1025 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1026 1, /* scalar_stmt_cost. */
1027 1, /* scalar load_cost. */
1028 1, /* scalar_store_cost. */
1029 1, /* vec_stmt_cost. */
1030 1, /* vec_to_scalar_cost. */
1031 1, /* scalar_to_vec_cost. */
1032 1, /* vec_align_load_cost. */
1033 2, /* vec_unalign_load_cost. */
1034 1, /* vec_store_cost. */
1035 3, /* cond_taken_branch_cost. */
1036 1, /* cond_not_taken_branch_cost. */
1040 struct processor_costs atom_cost = {
1041 COSTS_N_INSNS (1), /* cost of an add instruction */
1042 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1043 COSTS_N_INSNS (1), /* variable shift costs */
1044 COSTS_N_INSNS (1), /* constant shift costs */
1045 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1046 COSTS_N_INSNS (4), /* HI */
1047 COSTS_N_INSNS (3), /* SI */
1048 COSTS_N_INSNS (4), /* DI */
1049 COSTS_N_INSNS (2)}, /* other */
1050 0, /* cost of multiply per each bit set */
1051 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1052 COSTS_N_INSNS (26), /* HI */
1053 COSTS_N_INSNS (42), /* SI */
1054 COSTS_N_INSNS (74), /* DI */
1055 COSTS_N_INSNS (74)}, /* other */
1056 COSTS_N_INSNS (1), /* cost of movsx */
1057 COSTS_N_INSNS (1), /* cost of movzx */
1058 8, /* "large" insn */
1059 17, /* MOVE_RATIO */
1060 2, /* cost for loading QImode using movzbl */
1061 {4, 4, 4}, /* cost of loading integer registers
1062 in QImode, HImode and SImode.
1063 Relative to reg-reg move (2). */
1064 {4, 4, 4}, /* cost of storing integer registers */
1065 4, /* cost of reg,reg fld/fst */
1066 {12, 12, 12}, /* cost of loading fp registers
1067 in SFmode, DFmode and XFmode */
1068 {6, 6, 8}, /* cost of storing fp registers
1069 in SFmode, DFmode and XFmode */
1070 2, /* cost of moving MMX register */
1071 {8, 8}, /* cost of loading MMX registers
1072 in SImode and DImode */
1073 {8, 8}, /* cost of storing MMX registers
1074 in SImode and DImode */
1075 2, /* cost of moving SSE register */
1076 {8, 8, 8}, /* cost of loading SSE registers
1077 in SImode, DImode and TImode */
1078 {8, 8, 8}, /* cost of storing SSE registers
1079 in SImode, DImode and TImode */
1080 5, /* MMX or SSE register to integer */
1081 32, /* size of l1 cache. */
1082 256, /* size of l2 cache. */
1083 64, /* size of prefetch block */
1084 6, /* number of parallel prefetches */
1085 3, /* Branch cost */
1086 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1087 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1088 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1089 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1090 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1091 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1092 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1093 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1094 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1095 {{libcall, {{8, loop}, {15, unrolled_loop},
1096 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1097 {libcall, {{24, loop}, {32, unrolled_loop},
1098 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1099 1, /* scalar_stmt_cost. */
1100 1, /* scalar load_cost. */
1101 1, /* scalar_store_cost. */
1102 1, /* vec_stmt_cost. */
1103 1, /* vec_to_scalar_cost. */
1104 1, /* scalar_to_vec_cost. */
1105 1, /* vec_align_load_cost. */
1106 2, /* vec_unalign_load_cost. */
1107 1, /* vec_store_cost. */
1108 3, /* cond_taken_branch_cost. */
1109 1, /* cond_not_taken_branch_cost. */
1112 /* Generic64 should produce code tuned for Nocona and K8. */
1114 struct processor_costs generic64_cost = {
1115 COSTS_N_INSNS (1), /* cost of an add instruction */
1116 /* On all chips taken into consideration lea is 2 cycles and more. With
1117 this cost however our current implementation of synth_mult results in
1118 use of unnecessary temporary registers causing regression on several
1119 SPECfp benchmarks. */
1120 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1121 COSTS_N_INSNS (1), /* variable shift costs */
1122 COSTS_N_INSNS (1), /* constant shift costs */
1123 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1124 COSTS_N_INSNS (4), /* HI */
1125 COSTS_N_INSNS (3), /* SI */
1126 COSTS_N_INSNS (4), /* DI */
1127 COSTS_N_INSNS (2)}, /* other */
1128 0, /* cost of multiply per each bit set */
1129 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1130 COSTS_N_INSNS (26), /* HI */
1131 COSTS_N_INSNS (42), /* SI */
1132 COSTS_N_INSNS (74), /* DI */
1133 COSTS_N_INSNS (74)}, /* other */
1134 COSTS_N_INSNS (1), /* cost of movsx */
1135 COSTS_N_INSNS (1), /* cost of movzx */
1136 8, /* "large" insn */
1137 17, /* MOVE_RATIO */
1138 4, /* cost for loading QImode using movzbl */
1139 {4, 4, 4}, /* cost of loading integer registers
1140 in QImode, HImode and SImode.
1141 Relative to reg-reg move (2). */
1142 {4, 4, 4}, /* cost of storing integer registers */
1143 4, /* cost of reg,reg fld/fst */
1144 {12, 12, 12}, /* cost of loading fp registers
1145 in SFmode, DFmode and XFmode */
1146 {6, 6, 8}, /* cost of storing fp registers
1147 in SFmode, DFmode and XFmode */
1148 2, /* cost of moving MMX register */
1149 {8, 8}, /* cost of loading MMX registers
1150 in SImode and DImode */
1151 {8, 8}, /* cost of storing MMX registers
1152 in SImode and DImode */
1153 2, /* cost of moving SSE register */
1154 {8, 8, 8}, /* cost of loading SSE registers
1155 in SImode, DImode and TImode */
1156 {8, 8, 8}, /* cost of storing SSE registers
1157 in SImode, DImode and TImode */
1158 5, /* MMX or SSE register to integer */
1159 32, /* size of l1 cache. */
1160 512, /* size of l2 cache. */
1161 64, /* size of prefetch block */
1162 6, /* number of parallel prefetches */
1163 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1164 is increased to perhaps more appropriate value of 5. */
1165 3, /* Branch cost */
1166 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1167 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1168 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1169 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1170 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1171 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1172 {DUMMY_STRINGOP_ALGS,
1173 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1174 {DUMMY_STRINGOP_ALGS,
1175 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1176 1, /* scalar_stmt_cost. */
1177 1, /* scalar load_cost. */
1178 1, /* scalar_store_cost. */
1179 1, /* vec_stmt_cost. */
1180 1, /* vec_to_scalar_cost. */
1181 1, /* scalar_to_vec_cost. */
1182 1, /* vec_align_load_cost. */
1183 2, /* vec_unalign_load_cost. */
1184 1, /* vec_store_cost. */
1185 3, /* cond_taken_branch_cost. */
1186 1, /* cond_not_taken_branch_cost. */
1189 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1191 struct processor_costs generic32_cost = {
1192 COSTS_N_INSNS (1), /* cost of an add instruction */
1193 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1194 COSTS_N_INSNS (1), /* variable shift costs */
1195 COSTS_N_INSNS (1), /* constant shift costs */
1196 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1197 COSTS_N_INSNS (4), /* HI */
1198 COSTS_N_INSNS (3), /* SI */
1199 COSTS_N_INSNS (4), /* DI */
1200 COSTS_N_INSNS (2)}, /* other */
1201 0, /* cost of multiply per each bit set */
1202 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1203 COSTS_N_INSNS (26), /* HI */
1204 COSTS_N_INSNS (42), /* SI */
1205 COSTS_N_INSNS (74), /* DI */
1206 COSTS_N_INSNS (74)}, /* other */
1207 COSTS_N_INSNS (1), /* cost of movsx */
1208 COSTS_N_INSNS (1), /* cost of movzx */
1209 8, /* "large" insn */
1210 17, /* MOVE_RATIO */
1211 4, /* cost for loading QImode using movzbl */
1212 {4, 4, 4}, /* cost of loading integer registers
1213 in QImode, HImode and SImode.
1214 Relative to reg-reg move (2). */
1215 {4, 4, 4}, /* cost of storing integer registers */
1216 4, /* cost of reg,reg fld/fst */
1217 {12, 12, 12}, /* cost of loading fp registers
1218 in SFmode, DFmode and XFmode */
1219 {6, 6, 8}, /* cost of storing fp registers
1220 in SFmode, DFmode and XFmode */
1221 2, /* cost of moving MMX register */
1222 {8, 8}, /* cost of loading MMX registers
1223 in SImode and DImode */
1224 {8, 8}, /* cost of storing MMX registers
1225 in SImode and DImode */
1226 2, /* cost of moving SSE register */
1227 {8, 8, 8}, /* cost of loading SSE registers
1228 in SImode, DImode and TImode */
1229 {8, 8, 8}, /* cost of storing SSE registers
1230 in SImode, DImode and TImode */
1231 5, /* MMX or SSE register to integer */
1232 32, /* size of l1 cache. */
1233 256, /* size of l2 cache. */
1234 64, /* size of prefetch block */
1235 6, /* number of parallel prefetches */
1236 3, /* Branch cost */
1237 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1238 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1239 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1240 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1241 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1242 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1243 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1244 DUMMY_STRINGOP_ALGS},
1245 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1246 DUMMY_STRINGOP_ALGS},
1247 1, /* scalar_stmt_cost. */
1248 1, /* scalar load_cost. */
1249 1, /* scalar_store_cost. */
1250 1, /* vec_stmt_cost. */
1251 1, /* vec_to_scalar_cost. */
1252 1, /* scalar_to_vec_cost. */
1253 1, /* vec_align_load_cost. */
1254 2, /* vec_unalign_load_cost. */
1255 1, /* vec_store_cost. */
1256 3, /* cond_taken_branch_cost. */
1257 1, /* cond_not_taken_branch_cost. */
1260 const struct processor_costs *ix86_cost = &pentium_cost;
1262 /* Processor feature/optimization bitmasks. */
1263 #define m_386 (1<<PROCESSOR_I386)
1264 #define m_486 (1<<PROCESSOR_I486)
1265 #define m_PENT (1<<PROCESSOR_PENTIUM)
1266 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1267 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1268 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1269 #define m_CORE2 (1<<PROCESSOR_CORE2)
1270 #define m_ATOM (1<<PROCESSOR_ATOM)
1272 #define m_GEODE (1<<PROCESSOR_GEODE)
1273 #define m_K6 (1<<PROCESSOR_K6)
1274 #define m_K6_GEODE (m_K6 | m_GEODE)
1275 #define m_K8 (1<<PROCESSOR_K8)
1276 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1277 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1278 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1279 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1281 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1282 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1284 /* Generic instruction choice should be common subset of supported CPUs
1285 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1286 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1288 /* Feature tests against the various tunings. */
1289 unsigned char ix86_tune_features[X86_TUNE_LAST];
1291 /* Feature tests against the various tunings used to create ix86_tune_features
1292 based on the processor mask. */
1293 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1294 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1295 negatively, so enabling for Generic64 seems like good code size
1296 tradeoff. We can't enable it for 32bit generic because it does not
1297 work well with PPro base chips. */
1298 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1300 /* X86_TUNE_PUSH_MEMORY */
1301 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1302 | m_NOCONA | m_CORE2 | m_GENERIC,
1304 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1307 /* X86_TUNE_UNROLL_STRLEN */
1308 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1309 | m_CORE2 | m_GENERIC,
1311 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1312 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1314 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1315 on simulation result. But after P4 was made, no performance benefit
1316 was observed with branch hints. It also increases the code size.
1317 As a result, icc never generates branch hints. */
1320 /* X86_TUNE_DOUBLE_WITH_ADD */
1323 /* X86_TUNE_USE_SAHF */
1324 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1325 | m_NOCONA | m_CORE2 | m_GENERIC,
1327 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1328 partial dependencies. */
1329 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1330 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1332 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1333 register stalls on Generic32 compilation setting as well. However
1334 in current implementation the partial register stalls are not eliminated
1335 very well - they can be introduced via subregs synthesized by combine
1336 and can happen in caller/callee saving sequences. Because this option
1337 pays back little on PPro based chips and is in conflict with partial reg
1338 dependencies used by Athlon/P4 based chips, it is better to leave it off
1339 for generic32 for now. */
1342 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1343 m_CORE2 | m_GENERIC,
1345 /* X86_TUNE_USE_HIMODE_FIOP */
1346 m_386 | m_486 | m_K6_GEODE,
1348 /* X86_TUNE_USE_SIMODE_FIOP */
1349 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
1351 /* X86_TUNE_USE_MOV0 */
1354 /* X86_TUNE_USE_CLTD */
1355 ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
1357 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1360 /* X86_TUNE_SPLIT_LONG_MOVES */
1363 /* X86_TUNE_READ_MODIFY_WRITE */
1366 /* X86_TUNE_READ_MODIFY */
1369 /* X86_TUNE_PROMOTE_QIMODE */
1370 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1371 | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
1373 /* X86_TUNE_FAST_PREFIX */
1374 ~(m_PENT | m_486 | m_386),
1376 /* X86_TUNE_SINGLE_STRINGOP */
1377 m_386 | m_PENT4 | m_NOCONA,
1379 /* X86_TUNE_QIMODE_MATH */
1382 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1383 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1384 might be considered for Generic32 if our scheme for avoiding partial
1385 stalls was more effective. */
1388 /* X86_TUNE_PROMOTE_QI_REGS */
1391 /* X86_TUNE_PROMOTE_HI_REGS */
1394 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1395 m_ATOM | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA
1396 | m_CORE2 | m_GENERIC,
1398 /* X86_TUNE_ADD_ESP_8 */
1399 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_K6_GEODE | m_386
1400 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1402 /* X86_TUNE_SUB_ESP_4 */
1403 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2
1406 /* X86_TUNE_SUB_ESP_8 */
1407 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_386 | m_486
1408 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1410 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1411 for DFmode copies */
1412 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1413 | m_GENERIC | m_GEODE),
1415 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1416 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1418 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1419 conflict here in between PPro/Pentium4 based chips that thread 128bit
1420 SSE registers as single units versus K8 based chips that divide SSE
1421 registers to two 64bit halves. This knob promotes all store destinations
1422 to be 128bit to allow register renaming on 128bit SSE units, but usually
1423 results in one extra microop on 64bit SSE units. Experimental results
1424 shows that disabling this option on P4 brings over 20% SPECfp regression,
1425 while enabling it on K8 brings roughly 2.4% regression that can be partly
1426 masked by careful scheduling of moves. */
1427 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
1430 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1433 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1434 are resolved on SSE register parts instead of whole registers, so we may
1435 maintain just lower part of scalar values in proper format leaving the
1436 upper part undefined. */
1439 /* X86_TUNE_SSE_TYPELESS_STORES */
1442 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1443 m_PPRO | m_PENT4 | m_NOCONA,
1445 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1446 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1448 /* X86_TUNE_PROLOGUE_USING_MOVE */
1449 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1451 /* X86_TUNE_EPILOGUE_USING_MOVE */
1452 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1454 /* X86_TUNE_SHIFT1 */
1457 /* X86_TUNE_USE_FFREEP */
1460 /* X86_TUNE_INTER_UNIT_MOVES */
1461 ~(m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
1463 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1466 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1467 than 4 branch instructions in the 16 byte window. */
1468 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
1471 /* X86_TUNE_SCHEDULE */
1472 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
1475 /* X86_TUNE_USE_BT */
1476 m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
1478 /* X86_TUNE_USE_INCDEC */
1479 ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
1481 /* X86_TUNE_PAD_RETURNS */
1482 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1484 /* X86_TUNE_EXT_80387_CONSTANTS */
1485 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
1486 | m_CORE2 | m_GENERIC,
1488 /* X86_TUNE_SHORTEN_X87_SSE */
1491 /* X86_TUNE_AVOID_VECTOR_DECODE */
1494 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1495 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1498 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1499 vector path on AMD machines. */
1500 m_K8 | m_GENERIC64 | m_AMDFAM10,
1502 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1504 m_K8 | m_GENERIC64 | m_AMDFAM10,
1506 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1510 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1511 but one byte longer. */
1514 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1515 operand that cannot be represented using a modRM byte. The XOR
1516 replacement is long decoded, so this split helps here as well. */
1519 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1521 m_AMDFAM10 | m_GENERIC,
1523 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1524 from integer to FP. */
1527 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1528 with a subsequent conditional jump instruction into a single
1529 compare-and-branch uop. */
1532 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1533 will impact LEA instruction selection. */
1537 /* Feature tests against the various architecture variations. */
1538 unsigned char ix86_arch_features[X86_ARCH_LAST];
1540 /* Feature tests against the various architecture variations, used to create
1541 ix86_arch_features based on the processor mask. */
1542 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1543 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1544 ~(m_386 | m_486 | m_PENT | m_K6),
1546 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1549 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1552 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1555 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1559 static const unsigned int x86_accumulate_outgoing_args
1560 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1563 static const unsigned int x86_arch_always_fancy_math_387
1564 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1565 | m_NOCONA | m_CORE2 | m_GENERIC;
1567 static enum stringop_alg stringop_alg = no_stringop;
1569 /* In case the average insn count for single function invocation is
1570 lower than this constant, emit fast (but longer) prologue and
1572 #define FAST_PROLOGUE_INSN_COUNT 20
1574 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1575 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1576 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1577 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1579 /* Array of the smallest class containing reg number REGNO, indexed by
1580 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1582 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1584 /* ax, dx, cx, bx */
1585 AREG, DREG, CREG, BREG,
1586 /* si, di, bp, sp */
1587 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1589 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1590 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1593 /* flags, fpsr, fpcr, frame */
1594 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1596 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1599 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1602 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1603 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1604 /* SSE REX registers */
1605 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1609 /* The "default" register map used in 32bit mode. */
1611 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1613 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1614 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1615 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1616 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1617 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1618 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1619 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1622 /* The "default" register map used in 64bit mode. */
1624 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1626 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1627 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1628 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1629 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1630 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1631 8,9,10,11,12,13,14,15, /* extended integer registers */
1632 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1635 /* Define the register numbers to be used in Dwarf debugging information.
1636 The SVR4 reference port C compiler uses the following register numbers
1637 in its Dwarf output code:
1638 0 for %eax (gcc regno = 0)
1639 1 for %ecx (gcc regno = 2)
1640 2 for %edx (gcc regno = 1)
1641 3 for %ebx (gcc regno = 3)
1642 4 for %esp (gcc regno = 7)
1643 5 for %ebp (gcc regno = 6)
1644 6 for %esi (gcc regno = 4)
1645 7 for %edi (gcc regno = 5)
1646 The following three DWARF register numbers are never generated by
1647 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1648 believes these numbers have these meanings.
1649 8 for %eip (no gcc equivalent)
1650 9 for %eflags (gcc regno = 17)
1651 10 for %trapno (no gcc equivalent)
1652 It is not at all clear how we should number the FP stack registers
1653 for the x86 architecture. If the version of SDB on x86/svr4 were
1654 a bit less brain dead with respect to floating-point then we would
1655 have a precedent to follow with respect to DWARF register numbers
1656 for x86 FP registers, but the SDB on x86/svr4 is so completely
1657 broken with respect to FP registers that it is hardly worth thinking
1658 of it as something to strive for compatibility with.
1659 The version of x86/svr4 SDB I have at the moment does (partially)
1660 seem to believe that DWARF register number 11 is associated with
1661 the x86 register %st(0), but that's about all. Higher DWARF
1662 register numbers don't seem to be associated with anything in
1663 particular, and even for DWARF regno 11, SDB only seems to under-
1664 stand that it should say that a variable lives in %st(0) (when
1665 asked via an `=' command) if we said it was in DWARF regno 11,
1666 but SDB still prints garbage when asked for the value of the
1667 variable in question (via a `/' command).
1668 (Also note that the labels SDB prints for various FP stack regs
1669 when doing an `x' command are all wrong.)
1670 Note that these problems generally don't affect the native SVR4
1671 C compiler because it doesn't allow the use of -O with -g and
1672 because when it is *not* optimizing, it allocates a memory
1673 location for each floating-point variable, and the memory
1674 location is what gets described in the DWARF AT_location
1675 attribute for the variable in question.
1676 Regardless of the severe mental illness of the x86/svr4 SDB, we
1677 do something sensible here and we use the following DWARF
1678 register numbers. Note that these are all stack-top-relative
1680 11 for %st(0) (gcc regno = 8)
1681 12 for %st(1) (gcc regno = 9)
1682 13 for %st(2) (gcc regno = 10)
1683 14 for %st(3) (gcc regno = 11)
1684 15 for %st(4) (gcc regno = 12)
1685 16 for %st(5) (gcc regno = 13)
1686 17 for %st(6) (gcc regno = 14)
1687 18 for %st(7) (gcc regno = 15)
1689 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1691 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1692 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1693 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1694 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1695 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1696 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1697 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1700 /* Test and compare insns in i386.md store the information needed to
1701 generate branch and scc insns here. */
1703 rtx ix86_compare_op0 = NULL_RTX;
1704 rtx ix86_compare_op1 = NULL_RTX;
1706 /* Define parameter passing and return registers. */
1708 static int const x86_64_int_parameter_registers[6] =
1710 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
1713 static int const x86_64_ms_abi_int_parameter_registers[4] =
1715 CX_REG, DX_REG, R8_REG, R9_REG
1718 static int const x86_64_int_return_registers[4] =
1720 AX_REG, DX_REG, DI_REG, SI_REG
1723 /* Define the structure for the machine field in struct function. */
1725 struct GTY(()) stack_local_entry {
1726 unsigned short mode;
1729 struct stack_local_entry *next;
1732 /* Structure describing stack frame layout.
1733 Stack grows downward:
1739 saved frame pointer if frame_pointer_needed
1740 <- HARD_FRAME_POINTER
1749 [va_arg registers] (
1750 > to_allocate <- FRAME_POINTER
1762 HOST_WIDE_INT frame;
1764 int outgoing_arguments_size;
1767 HOST_WIDE_INT to_allocate;
1768 /* The offsets relative to ARG_POINTER. */
1769 HOST_WIDE_INT frame_pointer_offset;
1770 HOST_WIDE_INT hard_frame_pointer_offset;
1771 HOST_WIDE_INT stack_pointer_offset;
1773 /* When save_regs_using_mov is set, emit prologue using
1774 move instead of push instructions. */
1775 bool save_regs_using_mov;
1778 /* Code model option. */
1779 enum cmodel ix86_cmodel;
1781 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1783 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1785 /* Which unit we are generating floating point math for. */
1786 enum fpmath_unit ix86_fpmath;
1788 /* Which cpu are we scheduling for. */
1789 enum attr_cpu ix86_schedule;
1791 /* Which cpu are we optimizing for. */
1792 enum processor_type ix86_tune;
1794 /* Which instruction set architecture to use. */
1795 enum processor_type ix86_arch;
1797 /* true if sse prefetch instruction is not NOOP. */
1798 int x86_prefetch_sse;
1800 /* ix86_regparm_string as a number */
1801 static int ix86_regparm;
1803 /* -mstackrealign option */
1804 extern int ix86_force_align_arg_pointer;
1805 static const char ix86_force_align_arg_pointer_string[]
1806 = "force_align_arg_pointer";
1808 static rtx (*ix86_gen_leave) (void);
1809 static rtx (*ix86_gen_pop1) (rtx);
1810 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1811 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1812 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx);
1813 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1814 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1815 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1817 /* Preferred alignment for stack boundary in bits. */
1818 unsigned int ix86_preferred_stack_boundary;
1820 /* Alignment for incoming stack boundary in bits specified at
1822 static unsigned int ix86_user_incoming_stack_boundary;
1824 /* Default alignment for incoming stack boundary in bits. */
1825 static unsigned int ix86_default_incoming_stack_boundary;
1827 /* Alignment for incoming stack boundary in bits. */
1828 unsigned int ix86_incoming_stack_boundary;
1830 /* The abi used by target. */
1831 enum calling_abi ix86_abi;
1833 /* Values 1-5: see jump.c */
1834 int ix86_branch_cost;
1836 /* Calling abi specific va_list type nodes. */
1837 static GTY(()) tree sysv_va_list_type_node;
1838 static GTY(()) tree ms_va_list_type_node;
1840 /* Variables which are this size or smaller are put in the data/bss
1841 or ldata/lbss sections. */
1843 int ix86_section_threshold = 65536;
1845 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1846 char internal_label_prefix[16];
1847 int internal_label_prefix_len;
1849 /* Fence to use after loop using movnt. */
1852 /* Register class used for passing given 64bit part of the argument.
1853 These represent classes as documented by the PS ABI, with the exception
1854 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1855 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1857 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1858 whenever possible (upper half does contain padding). */
1859 enum x86_64_reg_class
1862 X86_64_INTEGER_CLASS,
1863 X86_64_INTEGERSI_CLASS,
1870 X86_64_COMPLEX_X87_CLASS,
1874 #define MAX_CLASSES 4
1876 /* Table of constants used by fldpi, fldln2, etc.... */
1877 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1878 static bool ext_80387_constants_init = 0;
1881 static struct machine_function * ix86_init_machine_status (void);
1882 static rtx ix86_function_value (const_tree, const_tree, bool);
1883 static int ix86_function_regparm (const_tree, const_tree);
1884 static void ix86_compute_frame_layout (struct ix86_frame *);
1885 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1887 static void ix86_add_new_builtins (int);
1889 enum ix86_function_specific_strings
1891 IX86_FUNCTION_SPECIFIC_ARCH,
1892 IX86_FUNCTION_SPECIFIC_TUNE,
1893 IX86_FUNCTION_SPECIFIC_FPMATH,
1894 IX86_FUNCTION_SPECIFIC_MAX
1897 static char *ix86_target_string (int, int, const char *, const char *,
1898 const char *, bool);
1899 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
1900 static void ix86_function_specific_save (struct cl_target_option *);
1901 static void ix86_function_specific_restore (struct cl_target_option *);
1902 static void ix86_function_specific_print (FILE *, int,
1903 struct cl_target_option *);
1904 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
1905 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
1906 static bool ix86_can_inline_p (tree, tree);
1907 static void ix86_set_current_function (tree);
1909 static enum calling_abi ix86_function_abi (const_tree);
1912 /* The svr4 ABI for the i386 says that records and unions are returned
1914 #ifndef DEFAULT_PCC_STRUCT_RETURN
1915 #define DEFAULT_PCC_STRUCT_RETURN 1
1918 /* Whether -mtune= or -march= were specified */
1919 static int ix86_tune_defaulted;
1920 static int ix86_arch_specified;
1922 /* Bit flags that specify the ISA we are compiling for. */
1923 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1925 /* A mask of ix86_isa_flags that includes bit X if X
1926 was set or cleared on the command line. */
1927 static int ix86_isa_flags_explicit;
1929 /* Define a set of ISAs which are available when a given ISA is
1930 enabled. MMX and SSE ISAs are handled separately. */
1932 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1933 #define OPTION_MASK_ISA_3DNOW_SET \
1934 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1936 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1937 #define OPTION_MASK_ISA_SSE2_SET \
1938 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1939 #define OPTION_MASK_ISA_SSE3_SET \
1940 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1941 #define OPTION_MASK_ISA_SSSE3_SET \
1942 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1943 #define OPTION_MASK_ISA_SSE4_1_SET \
1944 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1945 #define OPTION_MASK_ISA_SSE4_2_SET \
1946 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1947 #define OPTION_MASK_ISA_AVX_SET \
1948 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
1949 #define OPTION_MASK_ISA_FMA_SET \
1950 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
1952 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1954 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1956 #define OPTION_MASK_ISA_SSE4A_SET \
1957 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1958 #define OPTION_MASK_ISA_SSE5_SET \
1959 (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
1961 /* AES and PCLMUL need SSE2 because they use xmm registers */
1962 #define OPTION_MASK_ISA_AES_SET \
1963 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
1964 #define OPTION_MASK_ISA_PCLMUL_SET \
1965 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
1967 #define OPTION_MASK_ISA_ABM_SET \
1968 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
1969 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
1970 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
1971 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
1973 /* Define a set of ISAs which aren't available when a given ISA is
1974 disabled. MMX and SSE ISAs are handled separately. */
1976 #define OPTION_MASK_ISA_MMX_UNSET \
1977 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1978 #define OPTION_MASK_ISA_3DNOW_UNSET \
1979 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1980 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1982 #define OPTION_MASK_ISA_SSE_UNSET \
1983 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1984 #define OPTION_MASK_ISA_SSE2_UNSET \
1985 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
1986 #define OPTION_MASK_ISA_SSE3_UNSET \
1987 (OPTION_MASK_ISA_SSE3 \
1988 | OPTION_MASK_ISA_SSSE3_UNSET \
1989 | OPTION_MASK_ISA_SSE4A_UNSET )
1990 #define OPTION_MASK_ISA_SSSE3_UNSET \
1991 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
1992 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1993 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
1994 #define OPTION_MASK_ISA_SSE4_2_UNSET \
1995 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
1996 #define OPTION_MASK_ISA_AVX_UNSET \
1997 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET)
1998 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2000 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2002 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2004 #define OPTION_MASK_ISA_SSE4A_UNSET \
2005 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
2006 #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
2007 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2008 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2009 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2010 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2011 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2012 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2014 /* Vectorization library interface and handlers. */
2015 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
2016 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2017 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2019 /* Processor target table, indexed by processor number */
2022 const struct processor_costs *cost; /* Processor costs */
2023 const int align_loop; /* Default alignments. */
2024 const int align_loop_max_skip;
2025 const int align_jump;
2026 const int align_jump_max_skip;
2027 const int align_func;
2030 static const struct ptt processor_target_table[PROCESSOR_max] =
2032 {&i386_cost, 4, 3, 4, 3, 4},
2033 {&i486_cost, 16, 15, 16, 15, 16},
2034 {&pentium_cost, 16, 7, 16, 7, 16},
2035 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2036 {&geode_cost, 0, 0, 0, 0, 0},
2037 {&k6_cost, 32, 7, 32, 7, 32},
2038 {&athlon_cost, 16, 7, 16, 7, 16},
2039 {&pentium4_cost, 0, 0, 0, 0, 0},
2040 {&k8_cost, 16, 7, 16, 7, 16},
2041 {&nocona_cost, 0, 0, 0, 0, 0},
2042 {&core2_cost, 16, 10, 16, 10, 16},
2043 {&generic32_cost, 16, 7, 16, 7, 16},
2044 {&generic64_cost, 16, 10, 16, 10, 16},
2045 {&amdfam10_cost, 32, 24, 32, 7, 32},
2046 {&atom_cost, 16, 7, 16, 7, 16}
2049 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2075 /* Implement TARGET_HANDLE_OPTION. */
2078 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
2085 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2086 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2090 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2091 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2098 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2099 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2103 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2104 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2114 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2115 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2119 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2120 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2127 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2128 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2132 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2133 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2140 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2141 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2145 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2146 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2153 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2154 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2158 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2159 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2166 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2167 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2171 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2172 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2179 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2180 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2184 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2185 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2192 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2193 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2197 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2198 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2205 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2206 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2210 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2211 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2216 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2217 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2221 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2222 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2228 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2229 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2233 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2234 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2241 ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
2242 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
2246 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
2247 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
2254 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2255 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2259 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2260 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2267 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2268 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2272 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2273 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2280 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2281 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2285 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2286 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2293 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2294 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2298 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2299 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2306 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2307 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2311 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2312 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2319 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2320 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2324 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2325 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2334 /* Return a string the documents the current -m options. The caller is
2335 responsible for freeing the string. */
2338 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2339 const char *fpmath, bool add_nl_p)
2341 struct ix86_target_opts
2343 const char *option; /* option string */
2344 int mask; /* isa mask options */
2347 /* This table is ordered so that options like -msse5 or -msse4.2 that imply
2348 preceding options while match those first. */
2349 static struct ix86_target_opts isa_opts[] =
2351 { "-m64", OPTION_MASK_ISA_64BIT },
2352 { "-msse5", OPTION_MASK_ISA_SSE5 },
2353 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2354 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2355 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2356 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2357 { "-msse3", OPTION_MASK_ISA_SSE3 },
2358 { "-msse2", OPTION_MASK_ISA_SSE2 },
2359 { "-msse", OPTION_MASK_ISA_SSE },
2360 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2361 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2362 { "-mmmx", OPTION_MASK_ISA_MMX },
2363 { "-mabm", OPTION_MASK_ISA_ABM },
2364 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2365 { "-maes", OPTION_MASK_ISA_AES },
2366 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2370 static struct ix86_target_opts flag_opts[] =
2372 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2373 { "-m80387", MASK_80387 },
2374 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2375 { "-malign-double", MASK_ALIGN_DOUBLE },
2376 { "-mcld", MASK_CLD },
2377 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2378 { "-mieee-fp", MASK_IEEE_FP },
2379 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2380 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2381 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2382 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2383 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2384 { "-mno-fused-madd", MASK_NO_FUSED_MADD },
2385 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2386 { "-mno-red-zone", MASK_NO_RED_ZONE },
2387 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2388 { "-mrecip", MASK_RECIP },
2389 { "-mrtd", MASK_RTD },
2390 { "-msseregparm", MASK_SSEREGPARM },
2391 { "-mstack-arg-probe", MASK_STACK_PROBE },
2392 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2395 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2398 char target_other[40];
2407 memset (opts, '\0', sizeof (opts));
2409 /* Add -march= option. */
2412 opts[num][0] = "-march=";
2413 opts[num++][1] = arch;
2416 /* Add -mtune= option. */
2419 opts[num][0] = "-mtune=";
2420 opts[num++][1] = tune;
2423 /* Pick out the options in isa options. */
2424 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2426 if ((isa & isa_opts[i].mask) != 0)
2428 opts[num++][0] = isa_opts[i].option;
2429 isa &= ~ isa_opts[i].mask;
2433 if (isa && add_nl_p)
2435 opts[num++][0] = isa_other;
2436 sprintf (isa_other, "(other isa: 0x%x)", isa);
2439 /* Add flag options. */
2440 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2442 if ((flags & flag_opts[i].mask) != 0)
2444 opts[num++][0] = flag_opts[i].option;
2445 flags &= ~ flag_opts[i].mask;
2449 if (flags && add_nl_p)
2451 opts[num++][0] = target_other;
2452 sprintf (target_other, "(other flags: 0x%x)", isa);
2455 /* Add -fpmath= option. */
2458 opts[num][0] = "-mfpmath=";
2459 opts[num++][1] = fpmath;
2466 gcc_assert (num < ARRAY_SIZE (opts));
2468 /* Size the string. */
2470 sep_len = (add_nl_p) ? 3 : 1;
2471 for (i = 0; i < num; i++)
2474 for (j = 0; j < 2; j++)
2476 len += strlen (opts[i][j]);
2479 /* Build the string. */
2480 ret = ptr = (char *) xmalloc (len);
2483 for (i = 0; i < num; i++)
2487 for (j = 0; j < 2; j++)
2488 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2495 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2503 for (j = 0; j < 2; j++)
2506 memcpy (ptr, opts[i][j], len2[j]);
2508 line_len += len2[j];
2513 gcc_assert (ret + len >= ptr);
2518 /* Function that is callable from the debugger to print the current
2521 ix86_debug_options (void)
2523 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2524 ix86_arch_string, ix86_tune_string,
2525 ix86_fpmath_string, true);
2529 fprintf (stderr, "%s\n\n", opts);
2533 fprintf (stderr, "<no options>\n\n");
2538 /* Sometimes certain combinations of command options do not make
2539 sense on a particular target machine. You can define a macro
2540 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2541 defined, is executed once just after all the command options have
2544 Don't use this macro to turn on various extra optimizations for
2545 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2548 override_options (bool main_args_p)
2551 unsigned int ix86_arch_mask, ix86_tune_mask;
2556 /* Comes from final.c -- no real reason to change it. */
2557 #define MAX_CODE_ALIGN 16
2565 PTA_PREFETCH_SSE = 1 << 4,
2567 PTA_3DNOW_A = 1 << 6,
2571 PTA_POPCNT = 1 << 10,
2573 PTA_SSE4A = 1 << 12,
2574 PTA_NO_SAHF = 1 << 13,
2575 PTA_SSE4_1 = 1 << 14,
2576 PTA_SSE4_2 = 1 << 15,
2579 PTA_PCLMUL = 1 << 18,
2586 const char *const name; /* processor name or nickname. */
2587 const enum processor_type processor;
2588 const enum attr_cpu schedule;
2589 const unsigned /*enum pta_flags*/ flags;
2591 const processor_alias_table[] =
2593 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2594 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2595 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2596 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2597 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2598 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2599 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2600 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2601 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2602 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2603 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2604 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2605 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2607 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2609 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2610 PTA_MMX | PTA_SSE | PTA_SSE2},
2611 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2612 PTA_MMX |PTA_SSE | PTA_SSE2},
2613 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2614 PTA_MMX | PTA_SSE | PTA_SSE2},
2615 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2616 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2617 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2618 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2619 | PTA_CX16 | PTA_NO_SAHF},
2620 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2621 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2622 | PTA_SSSE3 | PTA_CX16},
2623 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2624 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2625 | PTA_SSSE3 | PTA_CX16},
2626 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2627 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2628 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2629 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2630 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2631 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2632 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2633 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2634 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2635 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2636 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2637 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2638 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2639 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2640 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2641 {"x86-64", PROCESSOR_K8, CPU_K8,
2642 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2643 {"k8", PROCESSOR_K8, CPU_K8,
2644 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2645 | PTA_SSE2 | PTA_NO_SAHF},
2646 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2647 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2648 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2649 {"opteron", PROCESSOR_K8, CPU_K8,
2650 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2651 | PTA_SSE2 | PTA_NO_SAHF},
2652 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2653 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2654 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2655 {"athlon64", PROCESSOR_K8, CPU_K8,
2656 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2657 | PTA_SSE2 | PTA_NO_SAHF},
2658 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2659 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2660 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2661 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2662 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2663 | PTA_SSE2 | PTA_NO_SAHF},
2664 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2665 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2666 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2667 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2668 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2669 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2670 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2671 0 /* flags are only used for -march switch. */ },
2672 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2673 PTA_64BIT /* flags are only used for -march switch. */ },
2676 int const pta_size = ARRAY_SIZE (processor_alias_table);
2678 /* Set up prefix/suffix so the error messages refer to either the command
2679 line argument, or the attribute(target). */
2688 prefix = "option(\"";
2693 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2694 SUBTARGET_OVERRIDE_OPTIONS;
2697 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2698 SUBSUBTARGET_OVERRIDE_OPTIONS;
2701 /* -fPIC is the default for x86_64. */
2702 if (TARGET_MACHO && TARGET_64BIT)
2705 /* Set the default values for switches whose default depends on TARGET_64BIT
2706 in case they weren't overwritten by command line options. */
2709 /* Mach-O doesn't support omitting the frame pointer for now. */
2710 if (flag_omit_frame_pointer == 2)
2711 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2712 if (flag_asynchronous_unwind_tables == 2)
2713 flag_asynchronous_unwind_tables = 1;
2714 if (flag_pcc_struct_return == 2)
2715 flag_pcc_struct_return = 0;
2719 if (flag_omit_frame_pointer == 2)
2720 flag_omit_frame_pointer = 0;
2721 if (flag_asynchronous_unwind_tables == 2)
2722 flag_asynchronous_unwind_tables = 0;
2723 if (flag_pcc_struct_return == 2)
2724 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2727 /* Need to check -mtune=generic first. */
2728 if (ix86_tune_string)
2730 if (!strcmp (ix86_tune_string, "generic")
2731 || !strcmp (ix86_tune_string, "i686")
2732 /* As special support for cross compilers we read -mtune=native
2733 as -mtune=generic. With native compilers we won't see the
2734 -mtune=native, as it was changed by the driver. */
2735 || !strcmp (ix86_tune_string, "native"))
2738 ix86_tune_string = "generic64";
2740 ix86_tune_string = "generic32";
2742 /* If this call is for setting the option attribute, allow the
2743 generic32/generic64 that was previously set. */
2744 else if (!main_args_p
2745 && (!strcmp (ix86_tune_string, "generic32")
2746 || !strcmp (ix86_tune_string, "generic64")))
2748 else if (!strncmp (ix86_tune_string, "generic", 7))
2749 error ("bad value (%s) for %stune=%s %s",
2750 ix86_tune_string, prefix, suffix, sw);
2754 if (ix86_arch_string)
2755 ix86_tune_string = ix86_arch_string;
2756 if (!ix86_tune_string)
2758 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2759 ix86_tune_defaulted = 1;
2762 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2763 need to use a sensible tune option. */
2764 if (!strcmp (ix86_tune_string, "generic")
2765 || !strcmp (ix86_tune_string, "x86-64")
2766 || !strcmp (ix86_tune_string, "i686"))
2769 ix86_tune_string = "generic64";
2771 ix86_tune_string = "generic32";
2774 if (ix86_stringop_string)
2776 if (!strcmp (ix86_stringop_string, "rep_byte"))
2777 stringop_alg = rep_prefix_1_byte;
2778 else if (!strcmp (ix86_stringop_string, "libcall"))
2779 stringop_alg = libcall;
2780 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2781 stringop_alg = rep_prefix_4_byte;
2782 else if (!strcmp (ix86_stringop_string, "rep_8byte")
2784 /* rep; movq isn't available in 32-bit code. */
2785 stringop_alg = rep_prefix_8_byte;
2786 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2787 stringop_alg = loop_1_byte;
2788 else if (!strcmp (ix86_stringop_string, "loop"))
2789 stringop_alg = loop;
2790 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2791 stringop_alg = unrolled_loop;
2793 error ("bad value (%s) for %sstringop-strategy=%s %s",
2794 ix86_stringop_string, prefix, suffix, sw);
2796 if (!strcmp (ix86_tune_string, "x86-64"))
2797 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
2798 "%stune=k8%s or %stune=generic%s instead as appropriate.",
2799 prefix, suffix, prefix, suffix, prefix, suffix);
2801 if (!ix86_arch_string)
2802 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2804 ix86_arch_specified = 1;
2806 if (!strcmp (ix86_arch_string, "generic"))
2807 error ("generic CPU can be used only for %stune=%s %s",
2808 prefix, suffix, sw);
2809 if (!strncmp (ix86_arch_string, "generic", 7))
2810 error ("bad value (%s) for %sarch=%s %s",
2811 ix86_arch_string, prefix, suffix, sw);
2813 /* Validate -mabi= value. */
2814 if (ix86_abi_string)
2816 if (strcmp (ix86_abi_string, "sysv") == 0)
2817 ix86_abi = SYSV_ABI;
2818 else if (strcmp (ix86_abi_string, "ms") == 0)
2821 error ("unknown ABI (%s) for %sabi=%s %s",
2822 ix86_abi_string, prefix, suffix, sw);
2825 ix86_abi = DEFAULT_ABI;
2827 if (ix86_cmodel_string != 0)
2829 if (!strcmp (ix86_cmodel_string, "small"))
2830 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2831 else if (!strcmp (ix86_cmodel_string, "medium"))
2832 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2833 else if (!strcmp (ix86_cmodel_string, "large"))
2834 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2836 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2837 else if (!strcmp (ix86_cmodel_string, "32"))
2838 ix86_cmodel = CM_32;
2839 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2840 ix86_cmodel = CM_KERNEL;
2842 error ("bad value (%s) for %scmodel=%s %s",
2843 ix86_cmodel_string, prefix, suffix, sw);
2847 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
2848 use of rip-relative addressing. This eliminates fixups that
2849 would otherwise be needed if this object is to be placed in a
2850 DLL, and is essentially just as efficient as direct addressing. */
2851 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
2852 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2853 else if (TARGET_64BIT)
2854 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2856 ix86_cmodel = CM_32;
2858 if (ix86_asm_string != 0)
2861 && !strcmp (ix86_asm_string, "intel"))
2862 ix86_asm_dialect = ASM_INTEL;
2863 else if (!strcmp (ix86_asm_string, "att"))
2864 ix86_asm_dialect = ASM_ATT;
2866 error ("bad value (%s) for %sasm=%s %s",
2867 ix86_asm_string, prefix, suffix, sw);
2869 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2870 error ("code model %qs not supported in the %s bit mode",
2871 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2872 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2873 sorry ("%i-bit mode not compiled in",
2874 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2876 for (i = 0; i < pta_size; i++)
2877 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2879 ix86_schedule = processor_alias_table[i].schedule;
2880 ix86_arch = processor_alias_table[i].processor;
2881 /* Default cpu tuning to the architecture. */
2882 ix86_tune = ix86_arch;
2884 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2885 error ("CPU you selected does not support x86-64 "
2888 if (processor_alias_table[i].flags & PTA_MMX
2889 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2890 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2891 if (processor_alias_table[i].flags & PTA_3DNOW
2892 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2893 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2894 if (processor_alias_table[i].flags & PTA_3DNOW_A
2895 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2896 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2897 if (processor_alias_table[i].flags & PTA_SSE
2898 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2899 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2900 if (processor_alias_table[i].flags & PTA_SSE2
2901 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2902 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2903 if (processor_alias_table[i].flags & PTA_SSE3
2904 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2905 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2906 if (processor_alias_table[i].flags & PTA_SSSE3
2907 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2908 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2909 if (processor_alias_table[i].flags & PTA_SSE4_1
2910 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2911 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2912 if (processor_alias_table[i].flags & PTA_SSE4_2
2913 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2914 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2915 if (processor_alias_table[i].flags & PTA_AVX
2916 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
2917 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
2918 if (processor_alias_table[i].flags & PTA_FMA
2919 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
2920 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
2921 if (processor_alias_table[i].flags & PTA_SSE4A
2922 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2923 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2924 if (processor_alias_table[i].flags & PTA_SSE5
2925 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2926 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2927 if (processor_alias_table[i].flags & PTA_ABM
2928 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
2929 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
2930 if (processor_alias_table[i].flags & PTA_CX16
2931 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
2932 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
2933 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
2934 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
2935 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
2936 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
2937 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
2938 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
2939 if (processor_alias_table[i].flags & PTA_AES
2940 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
2941 ix86_isa_flags |= OPTION_MASK_ISA_AES;
2942 if (processor_alias_table[i].flags & PTA_PCLMUL
2943 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
2944 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
2945 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2946 x86_prefetch_sse = true;
2952 error ("bad value (%s) for %sarch=%s %s",
2953 ix86_arch_string, prefix, suffix, sw);
2955 ix86_arch_mask = 1u << ix86_arch;
2956 for (i = 0; i < X86_ARCH_LAST; ++i)
2957 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
2959 for (i = 0; i < pta_size; i++)
2960 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2962 ix86_schedule = processor_alias_table[i].schedule;
2963 ix86_tune = processor_alias_table[i].processor;
2964 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2966 if (ix86_tune_defaulted)
2968 ix86_tune_string = "x86-64";
2969 for (i = 0; i < pta_size; i++)
2970 if (! strcmp (ix86_tune_string,
2971 processor_alias_table[i].name))
2973 ix86_schedule = processor_alias_table[i].schedule;
2974 ix86_tune = processor_alias_table[i].processor;
2977 error ("CPU you selected does not support x86-64 "
2980 /* Intel CPUs have always interpreted SSE prefetch instructions as
2981 NOPs; so, we can enable SSE prefetch instructions even when
2982 -mtune (rather than -march) points us to a processor that has them.
2983 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2984 higher processors. */
2986 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2987 x86_prefetch_sse = true;
2991 error ("bad value (%s) for %stune=%s %s",
2992 ix86_tune_string, prefix, suffix, sw);
2994 ix86_tune_mask = 1u << ix86_tune;
2995 for (i = 0; i < X86_TUNE_LAST; ++i)
2996 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
2999 ix86_cost = &ix86_size_cost;
3001 ix86_cost = processor_target_table[ix86_tune].cost;
3003 /* Arrange to set up i386_stack_locals for all functions. */
3004 init_machine_status = ix86_init_machine_status;
3006 /* Validate -mregparm= value. */
3007 if (ix86_regparm_string)
3010 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3011 i = atoi (ix86_regparm_string);
3012 if (i < 0 || i > REGPARM_MAX)
3013 error ("%sregparm=%d%s is not between 0 and %d",
3014 prefix, i, suffix, REGPARM_MAX);
3019 ix86_regparm = REGPARM_MAX;
3021 /* If the user has provided any of the -malign-* options,
3022 warn and use that value only if -falign-* is not set.
3023 Remove this code in GCC 3.2 or later. */
3024 if (ix86_align_loops_string)
3026 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3027 prefix, suffix, suffix);
3028 if (align_loops == 0)
3030 i = atoi (ix86_align_loops_string);
3031 if (i < 0 || i > MAX_CODE_ALIGN)
3032 error ("%salign-loops=%d%s is not between 0 and %d",
3033 prefix, i, suffix, MAX_CODE_ALIGN);
3035 align_loops = 1 << i;
3039 if (ix86_align_jumps_string)
3041 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3042 prefix, suffix, suffix);
3043 if (align_jumps == 0)
3045 i = atoi (ix86_align_jumps_string);
3046 if (i < 0 || i > MAX_CODE_ALIGN)
3047 error ("%salign-loops=%d%s is not between 0 and %d",
3048 prefix, i, suffix, MAX_CODE_ALIGN);
3050 align_jumps = 1 << i;
3054 if (ix86_align_funcs_string)
3056 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3057 prefix, suffix, suffix);
3058 if (align_functions == 0)
3060 i = atoi (ix86_align_funcs_string);
3061 if (i < 0 || i > MAX_CODE_ALIGN)
3062 error ("%salign-loops=%d%s is not between 0 and %d",
3063 prefix, i, suffix, MAX_CODE_ALIGN);
3065 align_functions = 1 << i;
3069 /* Default align_* from the processor table. */
3070 if (align_loops == 0)
3072 align_loops = processor_target_table[ix86_tune].align_loop;
3073 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3075 if (align_jumps == 0)
3077 align_jumps = processor_target_table[ix86_tune].align_jump;
3078 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3080 if (align_functions == 0)
3082 align_functions = processor_target_table[ix86_tune].align_func;
3085 /* Validate -mbranch-cost= value, or provide default. */
3086 ix86_branch_cost = ix86_cost->branch_cost;
3087 if (ix86_branch_cost_string)
3089 i = atoi (ix86_branch_cost_string);
3091 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3093 ix86_branch_cost = i;
3095 if (ix86_section_threshold_string)
3097 i = atoi (ix86_section_threshold_string);
3099 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3101 ix86_section_threshold = i;
3104 if (ix86_tls_dialect_string)
3106 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3107 ix86_tls_dialect = TLS_DIALECT_GNU;
3108 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3109 ix86_tls_dialect = TLS_DIALECT_GNU2;
3110 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
3111 ix86_tls_dialect = TLS_DIALECT_SUN;
3113 error ("bad value (%s) for %stls-dialect=%s %s",
3114 ix86_tls_dialect_string, prefix, suffix, sw);
3117 if (ix87_precision_string)
3119 i = atoi (ix87_precision_string);
3120 if (i != 32 && i != 64 && i != 80)
3121 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3126 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3128 /* Enable by default the SSE and MMX builtins. Do allow the user to
3129 explicitly disable any of these. In particular, disabling SSE and
3130 MMX for kernel code is extremely useful. */
3131 if (!ix86_arch_specified)
3133 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3134 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3137 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3141 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3143 if (!ix86_arch_specified)
3145 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3147 /* i386 ABI does not specify red zone. It still makes sense to use it
3148 when programmer takes care to stack from being destroyed. */
3149 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3150 target_flags |= MASK_NO_RED_ZONE;
3153 /* Keep nonleaf frame pointers. */
3154 if (flag_omit_frame_pointer)
3155 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3156 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3157 flag_omit_frame_pointer = 1;
3159 /* If we're doing fast math, we don't care about comparison order
3160 wrt NaNs. This lets us use a shorter comparison sequence. */
3161 if (flag_finite_math_only)
3162 target_flags &= ~MASK_IEEE_FP;
3164 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3165 since the insns won't need emulation. */
3166 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3167 target_flags &= ~MASK_NO_FANCY_MATH_387;
3169 /* Likewise, if the target doesn't have a 387, or we've specified
3170 software floating point, don't use 387 inline intrinsics. */
3172 target_flags |= MASK_NO_FANCY_MATH_387;
3174 /* Turn on MMX builtins for -msse. */
3177 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3178 x86_prefetch_sse = true;
3181 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3182 if (TARGET_SSE4_2 || TARGET_ABM)
3183 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3185 /* Validate -mpreferred-stack-boundary= value or default it to
3186 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3187 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3188 if (ix86_preferred_stack_boundary_string)
3190 i = atoi (ix86_preferred_stack_boundary_string);
3191 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3192 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3193 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3195 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3198 /* Set the default value for -mstackrealign. */
3199 if (ix86_force_align_arg_pointer == -1)
3200 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3202 /* Validate -mincoming-stack-boundary= value or default it to
3203 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3204 if (ix86_force_align_arg_pointer)
3205 ix86_default_incoming_stack_boundary = MIN_STACK_BOUNDARY;
3207 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3208 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3209 if (ix86_incoming_stack_boundary_string)
3211 i = atoi (ix86_incoming_stack_boundary_string);
3212 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3213 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3214 i, TARGET_64BIT ? 4 : 2);
3217 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3218 ix86_incoming_stack_boundary
3219 = ix86_user_incoming_stack_boundary;
3223 /* Accept -msseregparm only if at least SSE support is enabled. */
3224 if (TARGET_SSEREGPARM
3226 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3228 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3229 if (ix86_fpmath_string != 0)
3231 if (! strcmp (ix86_fpmath_string, "387"))
3232 ix86_fpmath = FPMATH_387;
3233 else if (! strcmp (ix86_fpmath_string, "sse"))
3237 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3238 ix86_fpmath = FPMATH_387;
3241 ix86_fpmath = FPMATH_SSE;
3243 else if (! strcmp (ix86_fpmath_string, "387,sse")
3244 || ! strcmp (ix86_fpmath_string, "387+sse")
3245 || ! strcmp (ix86_fpmath_string, "sse,387")
3246 || ! strcmp (ix86_fpmath_string, "sse+387")
3247 || ! strcmp (ix86_fpmath_string, "both"))
3251 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3252 ix86_fpmath = FPMATH_387;
3254 else if (!TARGET_80387)
3256 warning (0, "387 instruction set disabled, using SSE arithmetics");
3257 ix86_fpmath = FPMATH_SSE;
3260 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3263 error ("bad value (%s) for %sfpmath=%s %s",
3264 ix86_fpmath_string, prefix, suffix, sw);
3267 /* If the i387 is disabled, then do not return values in it. */
3269 target_flags &= ~MASK_FLOAT_RETURNS;
3271 /* Use external vectorized library in vectorizing intrinsics. */
3272 if (ix86_veclibabi_string)
3274 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3275 ix86_veclib_handler = ix86_veclibabi_svml;
3276 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3277 ix86_veclib_handler = ix86_veclibabi_acml;
3279 error ("unknown vectorization library ABI type (%s) for "
3280 "%sveclibabi=%s %s", ix86_veclibabi_string,
3281 prefix, suffix, sw);
3284 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
3285 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3287 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3289 /* ??? Unwind info is not correct around the CFG unless either a frame
3290 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3291 unwind info generation to be aware of the CFG and propagating states
3293 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3294 || flag_exceptions || flag_non_call_exceptions)
3295 && flag_omit_frame_pointer
3296 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3298 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3299 warning (0, "unwind tables currently require either a frame pointer "
3300 "or %saccumulate-outgoing-args%s for correctness",
3302 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3305 /* If stack probes are required, the space used for large function
3306 arguments on the stack must also be probed, so enable
3307 -maccumulate-outgoing-args so this happens in the prologue. */
3308 if (TARGET_STACK_PROBE
3309 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3311 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3312 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3313 "for correctness", prefix, suffix);
3314 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3317 /* For sane SSE instruction set generation we need fcomi instruction.
3318 It is safe to enable all CMOVE instructions. */
3322 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3325 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3326 p = strchr (internal_label_prefix, 'X');
3327 internal_label_prefix_len = p - internal_label_prefix;
3331 /* When scheduling description is not available, disable scheduler pass
3332 so it won't slow down the compilation and make x87 code slower. */
3333 if (!TARGET_SCHEDULE)
3334 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3336 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3337 set_param_value ("simultaneous-prefetches",
3338 ix86_cost->simultaneous_prefetches);
3339 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3340 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3341 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3342 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3343 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3344 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3346 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3347 can be optimized to ap = __builtin_next_arg (0). */
3349 targetm.expand_builtin_va_start = NULL;
3353 ix86_gen_leave = gen_leave_rex64;
3354 ix86_gen_pop1 = gen_popdi1;
3355 ix86_gen_add3 = gen_adddi3;
3356 ix86_gen_sub3 = gen_subdi3;
3357 ix86_gen_sub3_carry = gen_subdi3_carry_rex64;
3358 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3359 ix86_gen_monitor = gen_sse3_monitor64;
3360 ix86_gen_andsp = gen_anddi3;
3364 ix86_gen_leave = gen_leave;
3365 ix86_gen_pop1 = gen_popsi1;
3366 ix86_gen_add3 = gen_addsi3;
3367 ix86_gen_sub3 = gen_subsi3;
3368 ix86_gen_sub3_carry = gen_subsi3_carry;
3369 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3370 ix86_gen_monitor = gen_sse3_monitor;
3371 ix86_gen_andsp = gen_andsi3;
3375 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3377 target_flags |= MASK_CLD & ~target_flags_explicit;
3380 /* Save the initial options in case the user does function specific options */
3382 target_option_default_node = target_option_current_node
3383 = build_target_option_node ();
3386 /* Save the current options */
3389 ix86_function_specific_save (struct cl_target_option *ptr)
3391 gcc_assert (IN_RANGE (ix86_arch, 0, 255));
3392 gcc_assert (IN_RANGE (ix86_schedule, 0, 255));
3393 gcc_assert (IN_RANGE (ix86_tune, 0, 255));
3394 gcc_assert (IN_RANGE (ix86_fpmath, 0, 255));
3395 gcc_assert (IN_RANGE (ix86_branch_cost, 0, 255));
3397 ptr->arch = ix86_arch;
3398 ptr->schedule = ix86_schedule;
3399 ptr->tune = ix86_tune;
3400 ptr->fpmath = ix86_fpmath;
3401 ptr->branch_cost = ix86_branch_cost;
3402 ptr->tune_defaulted = ix86_tune_defaulted;
3403 ptr->arch_specified = ix86_arch_specified;
3404 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3405 ptr->target_flags_explicit = target_flags_explicit;
3408 /* Restore the current options */
3411 ix86_function_specific_restore (struct cl_target_option *ptr)
3413 enum processor_type old_tune = ix86_tune;
3414 enum processor_type old_arch = ix86_arch;
3415 unsigned int ix86_arch_mask, ix86_tune_mask;
3418 ix86_arch = (enum processor_type) ptr->arch;
3419 ix86_schedule = (enum attr_cpu) ptr->schedule;
3420 ix86_tune = (enum processor_type) ptr->tune;
3421 ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
3422 ix86_branch_cost = ptr->branch_cost;
3423 ix86_tune_defaulted = ptr->tune_defaulted;
3424 ix86_arch_specified = ptr->arch_specified;
3425 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3426 target_flags_explicit = ptr->target_flags_explicit;
3428 /* Recreate the arch feature tests if the arch changed */
3429 if (old_arch != ix86_arch)
3431 ix86_arch_mask = 1u << ix86_arch;
3432 for (i = 0; i < X86_ARCH_LAST; ++i)
3433 ix86_arch_features[i]
3434 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3437 /* Recreate the tune optimization tests */
3438 if (old_tune != ix86_tune)
3440 ix86_tune_mask = 1u << ix86_tune;
3441 for (i = 0; i < X86_TUNE_LAST; ++i)
3442 ix86_tune_features[i]
3443 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3447 /* Print the current options */
3450 ix86_function_specific_print (FILE *file, int indent,
3451 struct cl_target_option *ptr)
3454 = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3455 NULL, NULL, NULL, false);
3457 fprintf (file, "%*sarch = %d (%s)\n",
3460 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3461 ? cpu_names[ptr->arch]
3464 fprintf (file, "%*stune = %d (%s)\n",
3467 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3468 ? cpu_names[ptr->tune]
3471 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3472 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3473 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3474 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3478 fprintf (file, "%*s%s\n", indent, "", target_string);
3479 free (target_string);
3484 /* Inner function to process the attribute((target(...))), take an argument and
3485 set the current options from the argument. If we have a list, recursively go
3489 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3494 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3495 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3496 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3497 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3512 enum ix86_opt_type type;
3517 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3518 IX86_ATTR_ISA ("abm", OPT_mabm),
3519 IX86_ATTR_ISA ("aes", OPT_maes),
3520 IX86_ATTR_ISA ("avx", OPT_mavx),
3521 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3522 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3523 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3524 IX86_ATTR_ISA ("sse", OPT_msse),
3525 IX86_ATTR_ISA ("sse2", OPT_msse2),
3526 IX86_ATTR_ISA ("sse3", OPT_msse3),
3527 IX86_ATTR_ISA ("sse4", OPT_msse4),
3528 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3529 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3530 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3531 IX86_ATTR_ISA ("sse5", OPT_msse5),
3532 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3534 /* string options */
3535 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3536 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3537 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3540 IX86_ATTR_YES ("cld",
3544 IX86_ATTR_NO ("fancy-math-387",
3545 OPT_mfancy_math_387,
3546 MASK_NO_FANCY_MATH_387),
3548 IX86_ATTR_NO ("fused-madd",
3550 MASK_NO_FUSED_MADD),
3552 IX86_ATTR_YES ("ieee-fp",
3556 IX86_ATTR_YES ("inline-all-stringops",
3557 OPT_minline_all_stringops,
3558 MASK_INLINE_ALL_STRINGOPS),
3560 IX86_ATTR_YES ("inline-stringops-dynamically",
3561 OPT_minline_stringops_dynamically,
3562 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3564 IX86_ATTR_NO ("align-stringops",
3565 OPT_mno_align_stringops,
3566 MASK_NO_ALIGN_STRINGOPS),
3568 IX86_ATTR_YES ("recip",
3574 /* If this is a list, recurse to get the options. */
3575 if (TREE_CODE (args) == TREE_LIST)
3579 for (; args; args = TREE_CHAIN (args))
3580 if (TREE_VALUE (args)
3581 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3587 else if (TREE_CODE (args) != STRING_CST)
3590 /* Handle multiple arguments separated by commas. */
3591 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
3593 while (next_optstr && *next_optstr != '\0')
3595 char *p = next_optstr;
3597 char *comma = strchr (next_optstr, ',');
3598 const char *opt_string;
3599 size_t len, opt_len;
3604 enum ix86_opt_type type = ix86_opt_unknown;
3610 len = comma - next_optstr;
3611 next_optstr = comma + 1;
3619 /* Recognize no-xxx. */
3620 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
3629 /* Find the option. */
3632 for (i = 0; i < ARRAY_SIZE (attrs); i++)
3634 type = attrs[i].type;
3635 opt_len = attrs[i].len;
3636 if (ch == attrs[i].string[0]
3637 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
3638 && memcmp (p, attrs[i].string, opt_len) == 0)
3641 mask = attrs[i].mask;
3642 opt_string = attrs[i].string;
3647 /* Process the option. */
3650 error ("attribute(target(\"%s\")) is unknown", orig_p);
3654 else if (type == ix86_opt_isa)
3655 ix86_handle_option (opt, p, opt_set_p);
3657 else if (type == ix86_opt_yes || type == ix86_opt_no)
3659 if (type == ix86_opt_no)
3660 opt_set_p = !opt_set_p;
3663 target_flags |= mask;
3665 target_flags &= ~mask;
3668 else if (type == ix86_opt_str)
3672 error ("option(\"%s\") was already specified", opt_string);
3676 p_strings[opt] = xstrdup (p + opt_len);
3686 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
3689 ix86_valid_target_attribute_tree (tree args)
3691 const char *orig_arch_string = ix86_arch_string;
3692 const char *orig_tune_string = ix86_tune_string;
3693 const char *orig_fpmath_string = ix86_fpmath_string;
3694 int orig_tune_defaulted = ix86_tune_defaulted;
3695 int orig_arch_specified = ix86_arch_specified;
3696 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
3699 struct cl_target_option *def
3700 = TREE_TARGET_OPTION (target_option_default_node);
3702 /* Process each of the options on the chain. */
3703 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
3706 /* If the changed options are different from the default, rerun override_options,
3707 and then save the options away. The string options are are attribute options,
3708 and will be undone when we copy the save structure. */
3709 if (ix86_isa_flags != def->ix86_isa_flags
3710 || target_flags != def->target_flags
3711 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
3712 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
3713 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3715 /* If we are using the default tune= or arch=, undo the string assigned,
3716 and use the default. */
3717 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
3718 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
3719 else if (!orig_arch_specified)
3720 ix86_arch_string = NULL;
3722 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
3723 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
3724 else if (orig_tune_defaulted)
3725 ix86_tune_string = NULL;
3727 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
3728 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3729 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
3730 else if (!TARGET_64BIT && TARGET_SSE)
3731 ix86_fpmath_string = "sse,387";
3733 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
3734 override_options (false);
3736 /* Add any builtin functions with the new isa if any. */
3737 ix86_add_new_builtins (ix86_isa_flags);
3739 /* Save the current options unless we are validating options for
3741 t = build_target_option_node ();
3743 ix86_arch_string = orig_arch_string;
3744 ix86_tune_string = orig_tune_string;
3745 ix86_fpmath_string = orig_fpmath_string;
3747 /* Free up memory allocated to hold the strings */
3748 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
3749 if (option_strings[i])
3750 free (option_strings[i]);
3756 /* Hook to validate attribute((target("string"))). */
3759 ix86_valid_target_attribute_p (tree fndecl,
3760 tree ARG_UNUSED (name),
3762 int ARG_UNUSED (flags))
3764 struct cl_target_option cur_target;
3766 tree old_optimize = build_optimization_node ();
3767 tree new_target, new_optimize;
3768 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
3770 /* If the function changed the optimization levels as well as setting target
3771 options, start with the optimizations specified. */
3772 if (func_optimize && func_optimize != old_optimize)
3773 cl_optimization_restore (TREE_OPTIMIZATION (func_optimize));
3775 /* The target attributes may also change some optimization flags, so update
3776 the optimization options if necessary. */
3777 cl_target_option_save (&cur_target);
3778 new_target = ix86_valid_target_attribute_tree (args);
3779 new_optimize = build_optimization_node ();
3786 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
3788 if (old_optimize != new_optimize)
3789 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
3792 cl_target_option_restore (&cur_target);
3794 if (old_optimize != new_optimize)
3795 cl_optimization_restore (TREE_OPTIMIZATION (old_optimize));
3801 /* Hook to determine if one function can safely inline another. */
3804 ix86_can_inline_p (tree caller, tree callee)
3807 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
3808 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
3810 /* If callee has no option attributes, then it is ok to inline. */
3814 /* If caller has no option attributes, but callee does then it is not ok to
3816 else if (!caller_tree)
3821 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
3822 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
3824 /* Callee's isa options should a subset of the caller's, i.e. a SSE5 function
3825 can inline a SSE2 function but a SSE2 function can't inline a SSE5
3827 if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
3828 != callee_opts->ix86_isa_flags)
3831 /* See if we have the same non-isa options. */
3832 else if (caller_opts->target_flags != callee_opts->target_flags)
3835 /* See if arch, tune, etc. are the same. */
3836 else if (caller_opts->arch != callee_opts->arch)
3839 else if (caller_opts->tune != callee_opts->tune)
3842 else if (caller_opts->fpmath != callee_opts->fpmath)
3845 else if (caller_opts->branch_cost != callee_opts->branch_cost)
3856 /* Remember the last target of ix86_set_current_function. */
3857 static GTY(()) tree ix86_previous_fndecl;
3859 /* Establish appropriate back-end context for processing the function
3860 FNDECL. The argument might be NULL to indicate processing at top
3861 level, outside of any function scope. */
3863 ix86_set_current_function (tree fndecl)
3865 /* Only change the context if the function changes. This hook is called
3866 several times in the course of compiling a function, and we don't want to
3867 slow things down too much or call target_reinit when it isn't safe. */
3868 if (fndecl && fndecl != ix86_previous_fndecl)
3870 tree old_tree = (ix86_previous_fndecl
3871 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
3874 tree new_tree = (fndecl
3875 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
3878 ix86_previous_fndecl = fndecl;
3879 if (old_tree == new_tree)
3884 cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
3890 struct cl_target_option *def
3891 = TREE_TARGET_OPTION (target_option_current_node);
3893 cl_target_option_restore (def);
3900 /* Return true if this goes in large data/bss. */
3903 ix86_in_large_data_p (tree exp)
3905 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
3908 /* Functions are never large data. */
3909 if (TREE_CODE (exp) == FUNCTION_DECL)
3912 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
3914 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
3915 if (strcmp (section, ".ldata") == 0
3916 || strcmp (section, ".lbss") == 0)
3922 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
3924 /* If this is an incomplete type with size 0, then we can't put it
3925 in data because it might be too big when completed. */
3926 if (!size || size > ix86_section_threshold)
3933 /* Switch to the appropriate section for output of DECL.
3934 DECL is either a `VAR_DECL' node or a constant of some sort.
3935 RELOC indicates whether forming the initial value of DECL requires
3936 link-time relocations. */
3938 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
3942 x86_64_elf_select_section (tree decl, int reloc,
3943 unsigned HOST_WIDE_INT align)
3945 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3946 && ix86_in_large_data_p (decl))
3948 const char *sname = NULL;
3949 unsigned int flags = SECTION_WRITE;
3950 switch (categorize_decl_for_section (decl, reloc))
3955 case SECCAT_DATA_REL:
3956 sname = ".ldata.rel";
3958 case SECCAT_DATA_REL_LOCAL:
3959 sname = ".ldata.rel.local";
3961 case SECCAT_DATA_REL_RO:
3962 sname = ".ldata.rel.ro";
3964 case SECCAT_DATA_REL_RO_LOCAL:
3965 sname = ".ldata.rel.ro.local";
3969 flags |= SECTION_BSS;
3972 case SECCAT_RODATA_MERGE_STR:
3973 case SECCAT_RODATA_MERGE_STR_INIT:
3974 case SECCAT_RODATA_MERGE_CONST:
3978 case SECCAT_SRODATA:
3985 /* We don't split these for medium model. Place them into
3986 default sections and hope for best. */
3988 case SECCAT_EMUTLS_VAR:
3989 case SECCAT_EMUTLS_TMPL:
3994 /* We might get called with string constants, but get_named_section
3995 doesn't like them as they are not DECLs. Also, we need to set
3996 flags in that case. */
3998 return get_section (sname, flags, NULL);
3999 return get_named_section (decl, sname, reloc);
4002 return default_elf_select_section (decl, reloc, align);
4005 /* Build up a unique section name, expressed as a
4006 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4007 RELOC indicates whether the initial value of EXP requires
4008 link-time relocations. */
4010 static void ATTRIBUTE_UNUSED
4011 x86_64_elf_unique_section (tree decl, int reloc)
4013 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4014 && ix86_in_large_data_p (decl))
4016 const char *prefix = NULL;
4017 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4018 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4020 switch (categorize_decl_for_section (decl, reloc))
4023 case SECCAT_DATA_REL:
4024 case SECCAT_DATA_REL_LOCAL:
4025 case SECCAT_DATA_REL_RO:
4026 case SECCAT_DATA_REL_RO_LOCAL:
4027 prefix = one_only ? ".ld" : ".ldata";
4030 prefix = one_only ? ".lb" : ".lbss";
4033 case SECCAT_RODATA_MERGE_STR:
4034 case SECCAT_RODATA_MERGE_STR_INIT:
4035 case SECCAT_RODATA_MERGE_CONST:
4036 prefix = one_only ? ".lr" : ".lrodata";
4038 case SECCAT_SRODATA:
4045 /* We don't split these for medium model. Place them into
4046 default sections and hope for best. */
4048 case SECCAT_EMUTLS_VAR:
4049 prefix = targetm.emutls.var_section;
4051 case SECCAT_EMUTLS_TMPL:
4052 prefix = targetm.emutls.tmpl_section;
4057 const char *name, *linkonce;
4060 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4061 name = targetm.strip_name_encoding (name);
4063 /* If we're using one_only, then there needs to be a .gnu.linkonce
4064 prefix to the section name. */
4065 linkonce = one_only ? ".gnu.linkonce" : "";
4067 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4069 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4073 default_unique_section (decl, reloc);
4076 #ifdef COMMON_ASM_OP
4077 /* This says how to output assembler code to declare an
4078 uninitialized external linkage data object.
4080 For medium model x86-64 we need to use .largecomm opcode for
4083 x86_elf_aligned_common (FILE *file,
4084 const char *name, unsigned HOST_WIDE_INT size,
4087 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4088 && size > (unsigned int)ix86_section_threshold)
4089 fprintf (file, ".largecomm\t");
4091 fprintf (file, "%s", COMMON_ASM_OP);
4092 assemble_name (file, name);
4093 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
4094 size, align / BITS_PER_UNIT);
4098 /* Utility function for targets to use in implementing
4099 ASM_OUTPUT_ALIGNED_BSS. */
4102 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4103 const char *name, unsigned HOST_WIDE_INT size,
4106 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4107 && size > (unsigned int)ix86_section_threshold)
4108 switch_to_section (get_named_section (decl, ".lbss", 0));
4110 switch_to_section (bss_section);
4111 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4112 #ifdef ASM_DECLARE_OBJECT_NAME
4113 last_assemble_variable_decl = decl;
4114 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4116 /* Standard thing is just output label for the object. */
4117 ASM_OUTPUT_LABEL (file, name);
4118 #endif /* ASM_DECLARE_OBJECT_NAME */
4119 ASM_OUTPUT_SKIP (file, size ? size : 1);
4123 optimization_options (int level, int size ATTRIBUTE_UNUSED)
4125 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
4126 make the problem with not enough registers even worse. */
4127 #ifdef INSN_SCHEDULING
4129 flag_schedule_insns = 0;
4133 /* The Darwin libraries never set errno, so we might as well
4134 avoid calling them when that's the only reason we would. */
4135 flag_errno_math = 0;
4137 /* The default values of these switches depend on the TARGET_64BIT
4138 that is not known at this moment. Mark these values with 2 and
4139 let user the to override these. In case there is no command line option
4140 specifying them, we will set the defaults in override_options. */
4142 flag_omit_frame_pointer = 2;
4143 flag_pcc_struct_return = 2;
4144 flag_asynchronous_unwind_tables = 2;
4145 flag_vect_cost_model = 1;
4146 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4147 SUBTARGET_OPTIMIZATION_OPTIONS;
4151 /* Decide whether we can make a sibling call to a function. DECL is the
4152 declaration of the function being targeted by the call and EXP is the
4153 CALL_EXPR representing the call. */
4156 ix86_function_ok_for_sibcall (tree decl, tree exp)
4161 /* If we are generating position-independent code, we cannot sibcall
4162 optimize any indirect call, or a direct call to a global function,
4163 as the PLT requires %ebx be live. */
4164 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4171 func = TREE_TYPE (CALL_EXPR_FN (exp));
4172 if (POINTER_TYPE_P (func))
4173 func = TREE_TYPE (func);
4176 /* Check that the return value locations are the same. Like
4177 if we are returning floats on the 80387 register stack, we cannot
4178 make a sibcall from a function that doesn't return a float to a
4179 function that does or, conversely, from a function that does return
4180 a float to a function that doesn't; the necessary stack adjustment
4181 would not be executed. This is also the place we notice
4182 differences in the return value ABI. Note that it is ok for one
4183 of the functions to have void return type as long as the return
4184 value of the other is passed in a register. */
4185 a = ix86_function_value (TREE_TYPE (exp), func, false);
4186 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4188 if (STACK_REG_P (a) || STACK_REG_P (b))
4190 if (!rtx_equal_p (a, b))
4193 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4195 else if (!rtx_equal_p (a, b))
4198 /* If this call is indirect, we'll need to be able to use a call-clobbered
4199 register for the address of the target function. Make sure that all
4200 such registers are not used for passing parameters. */
4201 if (!decl && !TARGET_64BIT)
4205 /* We're looking at the CALL_EXPR, we need the type of the function. */
4206 type = CALL_EXPR_FN (exp); /* pointer expression */
4207 type = TREE_TYPE (type); /* pointer type */
4208 type = TREE_TYPE (type); /* function type */
4210 if (ix86_function_regparm (type, NULL) >= 3)
4212 /* ??? Need to count the actual number of registers to be used,
4213 not the possible number of registers. Fix later. */
4218 /* Dllimport'd functions are also called indirectly. */
4219 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
4221 && decl && DECL_DLLIMPORT_P (decl)
4222 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
4225 /* If we need to align the outgoing stack, then sibcalling would
4226 unalign the stack, which may break the called function. */
4227 if (ix86_incoming_stack_boundary < PREFERRED_STACK_BOUNDARY)
4230 /* Otherwise okay. That also includes certain types of indirect calls. */
4234 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
4235 calling convention attributes;
4236 arguments as in struct attribute_spec.handler. */
4239 ix86_handle_cconv_attribute (tree *node, tree name,
4241 int flags ATTRIBUTE_UNUSED,
4244 if (TREE_CODE (*node) != FUNCTION_TYPE
4245 && TREE_CODE (*node) != METHOD_TYPE
4246 && TREE_CODE (*node) != FIELD_DECL
4247 && TREE_CODE (*node) != TYPE_DECL)
4249 warning (OPT_Wattributes, "%qs attribute only applies to functions",
4250 IDENTIFIER_POINTER (name));
4251 *no_add_attrs = true;
4255 /* Can combine regparm with all attributes but fastcall. */
4256 if (is_attribute_p ("regparm", name))
4260 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4262 error ("fastcall and regparm attributes are not compatible");
4265 cst = TREE_VALUE (args);
4266 if (TREE_CODE (cst) != INTEGER_CST)
4268 warning (OPT_Wattributes,
4269 "%qs attribute requires an integer constant argument",
4270 IDENTIFIER_POINTER (name));
4271 *no_add_attrs = true;
4273 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4275 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
4276 IDENTIFIER_POINTER (name), REGPARM_MAX);
4277 *no_add_attrs = true;
4285 /* Do not warn when emulating the MS ABI. */
4286 if (TREE_CODE (*node) != FUNCTION_TYPE || ix86_function_type_abi (*node)!=MS_ABI)
4287 warning (OPT_Wattributes, "%qs attribute ignored",
4288 IDENTIFIER_POINTER (name));
4289 *no_add_attrs = true;
4293 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4294 if (is_attribute_p ("fastcall", name))
4296 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4298 error ("fastcall and cdecl attributes are not compatible");
4300 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4302 error ("fastcall and stdcall attributes are not compatible");
4304 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4306 error ("fastcall and regparm attributes are not compatible");
4310 /* Can combine stdcall with fastcall (redundant), regparm and
4312 else if (is_attribute_p ("stdcall", name))
4314 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4316 error ("stdcall and cdecl attributes are not compatible");
4318 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4320 error ("stdcall and fastcall attributes are not compatible");
4324 /* Can combine cdecl with regparm and sseregparm. */
4325 else if (is_attribute_p ("cdecl", name))
4327 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4329 error ("stdcall and cdecl attributes are not compatible");
4331 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4333 error ("fastcall and cdecl attributes are not compatible");
4337 /* Can combine sseregparm with all attributes. */
4342 /* Return 0 if the attributes for two types are incompatible, 1 if they
4343 are compatible, and 2 if they are nearly compatible (which causes a
4344 warning to be generated). */
4347 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4349 /* Check for mismatch of non-default calling convention. */
4350 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4352 if (TREE_CODE (type1) != FUNCTION_TYPE
4353 && TREE_CODE (type1) != METHOD_TYPE)
4356 /* Check for mismatched fastcall/regparm types. */
4357 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4358 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4359 || (ix86_function_regparm (type1, NULL)
4360 != ix86_function_regparm (type2, NULL)))
4363 /* Check for mismatched sseregparm types. */
4364 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4365 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4368 /* Check for mismatched return types (cdecl vs stdcall). */
4369 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4370 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4376 /* Return the regparm value for a function with the indicated TYPE and DECL.
4377 DECL may be NULL when calling function indirectly
4378 or considering a libcall. */
4381 ix86_function_regparm (const_tree type, const_tree decl)
4386 static bool error_issued;
4389 return (ix86_function_type_abi (type) == SYSV_ABI
4390 ? X86_64_REGPARM_MAX : X64_REGPARM_MAX);
4392 regparm = ix86_regparm;
4393 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4397 = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4399 if (decl && TREE_CODE (decl) == FUNCTION_DECL)
4401 /* We can't use regparm(3) for nested functions because
4402 these pass static chain pointer in %ecx register. */
4403 if (!error_issued && regparm == 3
4404 && decl_function_context (decl)
4405 && !DECL_NO_STATIC_CHAIN (decl))
4407 error ("nested functions are limited to 2 register parameters");
4408 error_issued = true;
4416 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4419 /* Use register calling convention for local functions when possible. */
4421 && TREE_CODE (decl) == FUNCTION_DECL
4425 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4426 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4429 int local_regparm, globals = 0, regno;
4432 /* Make sure no regparm register is taken by a
4433 fixed register variable. */
4434 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4435 if (fixed_regs[local_regparm])
4438 /* We can't use regparm(3) for nested functions as these use
4439 static chain pointer in third argument. */
4440 if (local_regparm == 3
4441 && decl_function_context (decl)
4442 && !DECL_NO_STATIC_CHAIN (decl))
4445 /* If the function realigns its stackpointer, the prologue will
4446 clobber %ecx. If we've already generated code for the callee,
4447 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
4448 scanning the attributes for the self-realigning property. */
4449 f = DECL_STRUCT_FUNCTION (decl);
4450 /* Since current internal arg pointer won't conflict with
4451 parameter passing regs, so no need to change stack
4452 realignment and adjust regparm number.
4454 Each fixed register usage increases register pressure,
4455 so less registers should be used for argument passing.
4456 This functionality can be overriden by an explicit
4458 for (regno = 0; regno <= DI_REG; regno++)
4459 if (fixed_regs[regno])
4463 = globals < local_regparm ? local_regparm - globals : 0;
4465 if (local_regparm > regparm)
4466 regparm = local_regparm;
4473 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4474 DFmode (2) arguments in SSE registers for a function with the
4475 indicated TYPE and DECL. DECL may be NULL when calling function
4476 indirectly or considering a libcall. Otherwise return 0. */
4479 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4481 gcc_assert (!TARGET_64BIT);
4483 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4484 by the sseregparm attribute. */
4485 if (TARGET_SSEREGPARM
4486 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4493 error ("Calling %qD with attribute sseregparm without "
4494 "SSE/SSE2 enabled", decl);
4496 error ("Calling %qT with attribute sseregparm without "
4497 "SSE/SSE2 enabled", type);
4505 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4506 (and DFmode for SSE2) arguments in SSE registers. */
4507 if (decl && TARGET_SSE_MATH && optimize && !profile_flag)
4509 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4510 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4512 return TARGET_SSE2 ? 2 : 1;
4518 /* Return true if EAX is live at the start of the function. Used by
4519 ix86_expand_prologue to determine if we need special help before
4520 calling allocate_stack_worker. */
4523 ix86_eax_live_at_start_p (void)
4525 /* Cheat. Don't bother working forward from ix86_function_regparm
4526 to the function type to whether an actual argument is located in
4527 eax. Instead just look at cfg info, which is still close enough
4528 to correct at this point. This gives false positives for broken
4529 functions that might use uninitialized data that happens to be
4530 allocated in eax, but who cares? */
4531 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4534 /* Value is the number of bytes of arguments automatically
4535 popped when returning from a subroutine call.
4536 FUNDECL is the declaration node of the function (as a tree),
4537 FUNTYPE is the data type of the function (as a tree),
4538 or for a library call it is an identifier node for the subroutine name.
4539 SIZE is the number of bytes of arguments passed on the stack.
4541 On the 80386, the RTD insn may be used to pop them if the number
4542 of args is fixed, but if the number is variable then the caller
4543 must pop them all. RTD can't be used for library calls now
4544 because the library is compiled with the Unix compiler.
4545 Use of RTD is a selectable option, since it is incompatible with
4546 standard Unix calling sequences. If the option is not selected,
4547 the caller must always pop the args.
4549 The attribute stdcall is equivalent to RTD on a per module basis. */
4552 ix86_return_pops_args (tree fundecl, tree funtype, int size)
4556 /* None of the 64-bit ABIs pop arguments. */
4560 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
4562 /* Cdecl functions override -mrtd, and never pop the stack. */
4563 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
4565 /* Stdcall and fastcall functions will pop the stack if not
4567 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
4568 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
4571 if (rtd && ! stdarg_p (funtype))
4575 /* Lose any fake structure return argument if it is passed on the stack. */
4576 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
4577 && !KEEP_AGGREGATE_RETURN_POINTER)
4579 int nregs = ix86_function_regparm (funtype, fundecl);
4581 return GET_MODE_SIZE (Pmode);
4587 /* Argument support functions. */
4589 /* Return true when register may be used to pass function parameters. */
4591 ix86_function_arg_regno_p (int regno)
4594 const int *parm_regs;
4599 return (regno < REGPARM_MAX
4600 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
4602 return (regno < REGPARM_MAX
4603 || (TARGET_MMX && MMX_REGNO_P (regno)
4604 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
4605 || (TARGET_SSE && SSE_REGNO_P (regno)
4606 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
4611 if (SSE_REGNO_P (regno) && TARGET_SSE)
4616 if (TARGET_SSE && SSE_REGNO_P (regno)
4617 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
4621 /* TODO: The function should depend on current function ABI but
4622 builtins.c would need updating then. Therefore we use the
4625 /* RAX is used as hidden argument to va_arg functions. */
4626 if (ix86_abi == SYSV_ABI && regno == AX_REG)
4629 if (ix86_abi == MS_ABI)
4630 parm_regs = x86_64_ms_abi_int_parameter_registers;
4632 parm_regs = x86_64_int_parameter_registers;
4633 for (i = 0; i < (ix86_abi == MS_ABI ? X64_REGPARM_MAX
4634 : X86_64_REGPARM_MAX); i++)
4635 if (regno == parm_regs[i])
4640 /* Return if we do not know how to pass TYPE solely in registers. */
4643 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
4645 if (must_pass_in_stack_var_size_or_pad (mode, type))
4648 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
4649 The layout_type routine is crafty and tries to trick us into passing
4650 currently unsupported vector types on the stack by using TImode. */
4651 return (!TARGET_64BIT && mode == TImode
4652 && type && TREE_CODE (type) != VECTOR_TYPE);
4655 /* It returns the size, in bytes, of the area reserved for arguments passed
4656 in registers for the function represented by fndecl dependent to the used
4659 ix86_reg_parm_stack_space (const_tree fndecl)
4661 enum calling_abi call_abi = SYSV_ABI;
4662 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
4663 call_abi = ix86_function_abi (fndecl);
4665 call_abi = ix86_function_type_abi (fndecl);
4666 if (call_abi == MS_ABI)
4671 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
4674 ix86_function_type_abi (const_tree fntype)
4676 if (TARGET_64BIT && fntype != NULL)
4678 enum calling_abi abi = ix86_abi;
4679 if (abi == SYSV_ABI)
4681 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
4684 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
4691 static enum calling_abi
4692 ix86_function_abi (const_tree fndecl)
4696 return ix86_function_type_abi (TREE_TYPE (fndecl));
4699 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
4702 ix86_cfun_abi (void)
4704 if (! cfun || ! TARGET_64BIT)
4706 return cfun->machine->call_abi;
4710 extern void init_regs (void);
4712 /* Implementation of call abi switching target hook. Specific to FNDECL
4713 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
4714 for more details. */
4716 ix86_call_abi_override (const_tree fndecl)
4718 if (fndecl == NULL_TREE)
4719 cfun->machine->call_abi = ix86_abi;
4721 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
4724 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
4725 re-initialization of init_regs each time we switch function context since
4726 this is needed only during RTL expansion. */
4728 ix86_maybe_switch_abi (void)
4731 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
4735 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4736 for a call to a function whose data type is FNTYPE.
4737 For a library call, FNTYPE is 0. */
4740 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
4741 tree fntype, /* tree ptr for function decl */
4742 rtx libname, /* SYMBOL_REF of library name or 0 */
4745 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
4746 memset (cum, 0, sizeof (*cum));
4749 cum->call_abi = ix86_function_abi (fndecl);
4751 cum->call_abi = ix86_function_type_abi (fntype);
4752 /* Set up the number of registers to use for passing arguments. */
4754 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
4755 sorry ("ms_abi attribute require -maccumulate-outgoing-args or subtarget optimization implying it");
4756 cum->nregs = ix86_regparm;
4759 if (cum->call_abi != ix86_abi)
4760 cum->nregs = ix86_abi != SYSV_ABI ? X86_64_REGPARM_MAX
4765 cum->sse_nregs = SSE_REGPARM_MAX;
4768 if (cum->call_abi != ix86_abi)
4769 cum->sse_nregs = ix86_abi != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
4770 : X64_SSE_REGPARM_MAX;
4774 cum->mmx_nregs = MMX_REGPARM_MAX;
4775 cum->warn_avx = true;
4776 cum->warn_sse = true;
4777 cum->warn_mmx = true;
4779 /* Because type might mismatch in between caller and callee, we need to
4780 use actual type of function for local calls.
4781 FIXME: cgraph_analyze can be told to actually record if function uses
4782 va_start so for local functions maybe_vaarg can be made aggressive
4784 FIXME: once typesytem is fixed, we won't need this code anymore. */
4786 fntype = TREE_TYPE (fndecl);
4787 cum->maybe_vaarg = (fntype
4788 ? (!prototype_p (fntype) || stdarg_p (fntype))
4793 /* If there are variable arguments, then we won't pass anything
4794 in registers in 32-bit mode. */
4795 if (stdarg_p (fntype))
4806 /* Use ecx and edx registers if function has fastcall attribute,
4807 else look for regparm information. */
4810 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
4816 cum->nregs = ix86_function_regparm (fntype, fndecl);
4819 /* Set up the number of SSE registers used for passing SFmode
4820 and DFmode arguments. Warn for mismatching ABI. */
4821 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
4825 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
4826 But in the case of vector types, it is some vector mode.
4828 When we have only some of our vector isa extensions enabled, then there
4829 are some modes for which vector_mode_supported_p is false. For these
4830 modes, the generic vector support in gcc will choose some non-vector mode
4831 in order to implement the type. By computing the natural mode, we'll
4832 select the proper ABI location for the operand and not depend on whatever
4833 the middle-end decides to do with these vector types.
4835 The midde-end can't deal with the vector types > 16 bytes. In this
4836 case, we return the original mode and warn ABI change if CUM isn't
4839 static enum machine_mode
4840 type_natural_mode (const_tree type, CUMULATIVE_ARGS *cum)
4842 enum machine_mode mode = TYPE_MODE (type);
4844 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
4846 HOST_WIDE_INT size = int_size_in_bytes (type);
4847 if ((size == 8 || size == 16 || size == 32)
4848 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
4849 && TYPE_VECTOR_SUBPARTS (type) > 1)
4851 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
4853 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
4854 mode = MIN_MODE_VECTOR_FLOAT;
4856 mode = MIN_MODE_VECTOR_INT;
4858 /* Get the mode which has this inner mode and number of units. */
4859 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
4860 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
4861 && GET_MODE_INNER (mode) == innermode)
4863 if (size == 32 && !TARGET_AVX)
4865 static bool warnedavx;
4872 warning (0, "AVX vector argument without AVX "
4873 "enabled changes the ABI");
4875 return TYPE_MODE (type);
4888 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
4889 this may not agree with the mode that the type system has chosen for the
4890 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
4891 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
4894 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
4899 if (orig_mode != BLKmode)
4900 tmp = gen_rtx_REG (orig_mode, regno);
4903 tmp = gen_rtx_REG (mode, regno);
4904 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
4905 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
4911 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
4912 of this code is to classify each 8bytes of incoming argument by the register
4913 class and assign registers accordingly. */
4915 /* Return the union class of CLASS1 and CLASS2.
4916 See the x86-64 PS ABI for details. */
4918 static enum x86_64_reg_class
4919 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
4921 /* Rule #1: If both classes are equal, this is the resulting class. */
4922 if (class1 == class2)
4925 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
4927 if (class1 == X86_64_NO_CLASS)
4929 if (class2 == X86_64_NO_CLASS)
4932 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
4933 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
4934 return X86_64_MEMORY_CLASS;
4936 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
4937 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
4938 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
4939 return X86_64_INTEGERSI_CLASS;
4940 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
4941 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
4942 return X86_64_INTEGER_CLASS;
4944 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
4946 if (class1 == X86_64_X87_CLASS
4947 || class1 == X86_64_X87UP_CLASS
4948 || class1 == X86_64_COMPLEX_X87_CLASS
4949 || class2 == X86_64_X87_CLASS
4950 || class2 == X86_64_X87UP_CLASS
4951 || class2 == X86_64_COMPLEX_X87_CLASS)
4952 return X86_64_MEMORY_CLASS;
4954 /* Rule #6: Otherwise class SSE is used. */
4955 return X86_64_SSE_CLASS;
4958 /* Classify the argument of type TYPE and mode MODE.
4959 CLASSES will be filled by the register class used to pass each word
4960 of the operand. The number of words is returned. In case the parameter
4961 should be passed in memory, 0 is returned. As a special case for zero
4962 sized containers, classes[0] will be NO_CLASS and 1 is returned.
4964 BIT_OFFSET is used internally for handling records and specifies offset
4965 of the offset in bits modulo 256 to avoid overflow cases.
4967 See the x86-64 PS ABI for details.
4971 classify_argument (enum machine_mode mode, const_tree type,
4972 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
4974 HOST_WIDE_INT bytes =
4975 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
4976 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4978 /* Variable sized entities are always passed/returned in memory. */
4982 if (mode != VOIDmode
4983 && targetm.calls.must_pass_in_stack (mode, type))
4986 if (type && AGGREGATE_TYPE_P (type))
4990 enum x86_64_reg_class subclasses[MAX_CLASSES];
4992 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
4996 for (i = 0; i < words; i++)
4997 classes[i] = X86_64_NO_CLASS;
4999 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5000 signalize memory class, so handle it as special case. */
5003 classes[0] = X86_64_NO_CLASS;
5007 /* Classify each field of record and merge classes. */
5008 switch (TREE_CODE (type))
5011 /* And now merge the fields of structure. */
5012 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5014 if (TREE_CODE (field) == FIELD_DECL)
5018 if (TREE_TYPE (field) == error_mark_node)
5021 /* Bitfields are always classified as integer. Handle them
5022 early, since later code would consider them to be
5023 misaligned integers. */
5024 if (DECL_BIT_FIELD (field))
5026 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5027 i < ((int_bit_position (field) + (bit_offset % 64))
5028 + tree_low_cst (DECL_SIZE (field), 0)
5031 merge_classes (X86_64_INTEGER_CLASS,
5038 type = TREE_TYPE (field);
5040 /* Flexible array member is ignored. */
5041 if (TYPE_MODE (type) == BLKmode
5042 && TREE_CODE (type) == ARRAY_TYPE
5043 && TYPE_SIZE (type) == NULL_TREE
5044 && TYPE_DOMAIN (type) != NULL_TREE
5045 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5050 if (!warned && warn_psabi)
5053 inform (input_location,
5054 "The ABI of passing struct with"
5055 " a flexible array member has"
5056 " changed in GCC 4.4");
5060 num = classify_argument (TYPE_MODE (type), type,
5062 (int_bit_position (field)
5063 + bit_offset) % 256);
5066 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5067 for (i = 0; i < num && (i + pos) < words; i++)
5069 merge_classes (subclasses[i], classes[i + pos]);
5076 /* Arrays are handled as small records. */
5079 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5080 TREE_TYPE (type), subclasses, bit_offset);
5084 /* The partial classes are now full classes. */
5085 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5086 subclasses[0] = X86_64_SSE_CLASS;
5087 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5088 && !((bit_offset % 64) == 0 && bytes == 4))
5089 subclasses[0] = X86_64_INTEGER_CLASS;
5091 for (i = 0; i < words; i++)
5092 classes[i] = subclasses[i % num];
5097 case QUAL_UNION_TYPE:
5098 /* Unions are similar to RECORD_TYPE but offset is always 0.
5100 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5102 if (TREE_CODE (field) == FIELD_DECL)
5106 if (TREE_TYPE (field) == error_mark_node)
5109 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5110 TREE_TYPE (field), subclasses,
5114 for (i = 0; i < num; i++)
5115 classes[i] = merge_classes (subclasses[i], classes[i]);
5126 /* When size > 16 bytes, if the first one isn't
5127 X86_64_SSE_CLASS or any other ones aren't
5128 X86_64_SSEUP_CLASS, everything should be passed in
5130 if (classes[0] != X86_64_SSE_CLASS)
5133 for (i = 1; i < words; i++)
5134 if (classes[i] != X86_64_SSEUP_CLASS)
5138 /* Final merger cleanup. */
5139 for (i = 0; i < words; i++)
5141 /* If one class is MEMORY, everything should be passed in
5143 if (classes[i] == X86_64_MEMORY_CLASS)
5146 /* The X86_64_SSEUP_CLASS should be always preceded by
5147 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5148 if (classes[i] == X86_64_SSEUP_CLASS
5149 && classes[i - 1] != X86_64_SSE_CLASS
5150 && classes[i - 1] != X86_64_SSEUP_CLASS)
5152 /* The first one should never be X86_64_SSEUP_CLASS. */
5153 gcc_assert (i != 0);
5154 classes[i] = X86_64_SSE_CLASS;
5157 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5158 everything should be passed in memory. */
5159 if (classes[i] == X86_64_X87UP_CLASS
5160 && (classes[i - 1] != X86_64_X87_CLASS))
5164 /* The first one should never be X86_64_X87UP_CLASS. */
5165 gcc_assert (i != 0);
5166 if (!warned && warn_psabi)
5169 inform (input_location,
5170 "The ABI of passing union with long double"
5171 " has changed in GCC 4.4");
5179 /* Compute alignment needed. We align all types to natural boundaries with
5180 exception of XFmode that is aligned to 64bits. */
5181 if (mode != VOIDmode && mode != BLKmode)
5183 int mode_alignment = GET_MODE_BITSIZE (mode);
5186 mode_alignment = 128;
5187 else if (mode == XCmode)
5188 mode_alignment = 256;
5189 if (COMPLEX_MODE_P (mode))
5190 mode_alignment /= 2;
5191 /* Misaligned fields are always returned in memory. */
5192 if (bit_offset % mode_alignment)
5196 /* for V1xx modes, just use the base mode */
5197 if (VECTOR_MODE_P (mode) && mode != V1DImode
5198 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5199 mode = GET_MODE_INNER (mode);
5201 /* Classification of atomic types. */
5206 classes[0] = X86_64_SSE_CLASS;
5209 classes[0] = X86_64_SSE_CLASS;
5210 classes[1] = X86_64_SSEUP_CLASS;
5220 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5224 classes[0] = X86_64_INTEGERSI_CLASS;
5227 else if (size <= 64)
5229 classes[0] = X86_64_INTEGER_CLASS;
5232 else if (size <= 64+32)
5234 classes[0] = X86_64_INTEGER_CLASS;
5235 classes[1] = X86_64_INTEGERSI_CLASS;
5238 else if (size <= 64+64)
5240 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5248 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5252 /* OImode shouldn't be used directly. */
5257 if (!(bit_offset % 64))
5258 classes[0] = X86_64_SSESF_CLASS;
5260 classes[0] = X86_64_SSE_CLASS;
5263 classes[0] = X86_64_SSEDF_CLASS;
5266 classes[0] = X86_64_X87_CLASS;
5267 classes[1] = X86_64_X87UP_CLASS;
5270 classes[0] = X86_64_SSE_CLASS;
5271 classes[1] = X86_64_SSEUP_CLASS;
5274 classes[0] = X86_64_SSE_CLASS;
5275 if (!(bit_offset % 64))
5281 if (!warned && warn_psabi)
5284 inform (input_location,
5285 "The ABI of passing structure with complex float"
5286 " member has changed in GCC 4.4");
5288 classes[1] = X86_64_SSESF_CLASS;
5292 classes[0] = X86_64_SSEDF_CLASS;
5293 classes[1] = X86_64_SSEDF_CLASS;
5296 classes[0] = X86_64_COMPLEX_X87_CLASS;
5299 /* This modes is larger than 16 bytes. */
5307 classes[0] = X86_64_SSE_CLASS;
5308 classes[1] = X86_64_SSEUP_CLASS;
5309 classes[2] = X86_64_SSEUP_CLASS;
5310 classes[3] = X86_64_SSEUP_CLASS;
5318 classes[0] = X86_64_SSE_CLASS;
5319 classes[1] = X86_64_SSEUP_CLASS;
5326 classes[0] = X86_64_SSE_CLASS;
5332 gcc_assert (VECTOR_MODE_P (mode));
5337 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5339 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5340 classes[0] = X86_64_INTEGERSI_CLASS;
5342 classes[0] = X86_64_INTEGER_CLASS;
5343 classes[1] = X86_64_INTEGER_CLASS;
5344 return 1 + (bytes > 8);
5348 /* Examine the argument and return set number of register required in each
5349 class. Return 0 iff parameter should be passed in memory. */
5351 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5352 int *int_nregs, int *sse_nregs)
5354 enum x86_64_reg_class regclass[MAX_CLASSES];
5355 int n = classify_argument (mode, type, regclass, 0);
5361 for (n--; n >= 0; n--)
5362 switch (regclass[n])
5364 case X86_64_INTEGER_CLASS:
5365 case X86_64_INTEGERSI_CLASS:
5368 case X86_64_SSE_CLASS:
5369 case X86_64_SSESF_CLASS:
5370 case X86_64_SSEDF_CLASS:
5373 case X86_64_NO_CLASS:
5374 case X86_64_SSEUP_CLASS:
5376 case X86_64_X87_CLASS:
5377 case X86_64_X87UP_CLASS:
5381 case X86_64_COMPLEX_X87_CLASS:
5382 return in_return ? 2 : 0;
5383 case X86_64_MEMORY_CLASS:
5389 /* Construct container for the argument used by GCC interface. See
5390 FUNCTION_ARG for the detailed description. */
5393 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5394 const_tree type, int in_return, int nintregs, int nsseregs,
5395 const int *intreg, int sse_regno)
5397 /* The following variables hold the static issued_error state. */
5398 static bool issued_sse_arg_error;
5399 static bool issued_sse_ret_error;
5400 static bool issued_x87_ret_error;
5402 enum machine_mode tmpmode;
5404 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5405 enum x86_64_reg_class regclass[MAX_CLASSES];
5409 int needed_sseregs, needed_intregs;
5410 rtx exp[MAX_CLASSES];
5413 n = classify_argument (mode, type, regclass, 0);
5416 if (!examine_argument (mode, type, in_return, &needed_intregs,
5419 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5422 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5423 some less clueful developer tries to use floating-point anyway. */
5424 if (needed_sseregs && !TARGET_SSE)
5428 if (!issued_sse_ret_error)
5430 error ("SSE register return with SSE disabled");
5431 issued_sse_ret_error = true;
5434 else if (!issued_sse_arg_error)
5436 error ("SSE register argument with SSE disabled");
5437 issued_sse_arg_error = true;
5442 /* Likewise, error if the ABI requires us to return values in the
5443 x87 registers and the user specified -mno-80387. */
5444 if (!TARGET_80387 && in_return)
5445 for (i = 0; i < n; i++)
5446 if (regclass[i] == X86_64_X87_CLASS
5447 || regclass[i] == X86_64_X87UP_CLASS
5448 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5450 if (!issued_x87_ret_error)
5452 error ("x87 register return with x87 disabled");
5453 issued_x87_ret_error = true;
5458 /* First construct simple cases. Avoid SCmode, since we want to use
5459 single register to pass this type. */
5460 if (n == 1 && mode != SCmode)
5461 switch (regclass[0])
5463 case X86_64_INTEGER_CLASS:
5464 case X86_64_INTEGERSI_CLASS:
5465 return gen_rtx_REG (mode, intreg[0]);
5466 case X86_64_SSE_CLASS:
5467 case X86_64_SSESF_CLASS:
5468 case X86_64_SSEDF_CLASS:
5469 if (mode != BLKmode)
5470 return gen_reg_or_parallel (mode, orig_mode,
5471 SSE_REGNO (sse_regno));
5473 case X86_64_X87_CLASS:
5474 case X86_64_COMPLEX_X87_CLASS:
5475 return gen_rtx_REG (mode, FIRST_STACK_REG);
5476 case X86_64_NO_CLASS:
5477 /* Zero sized array, struct or class. */
5482 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5483 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5484 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5486 && regclass[0] == X86_64_SSE_CLASS
5487 && regclass[1] == X86_64_SSEUP_CLASS
5488 && regclass[2] == X86_64_SSEUP_CLASS
5489 && regclass[3] == X86_64_SSEUP_CLASS
5491 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5494 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
5495 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
5496 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
5497 && regclass[1] == X86_64_INTEGER_CLASS
5498 && (mode == CDImode || mode == TImode || mode == TFmode)
5499 && intreg[0] + 1 == intreg[1])
5500 return gen_rtx_REG (mode, intreg[0]);
5502 /* Otherwise figure out the entries of the PARALLEL. */
5503 for (i = 0; i < n; i++)
5507 switch (regclass[i])
5509 case X86_64_NO_CLASS:
5511 case X86_64_INTEGER_CLASS:
5512 case X86_64_INTEGERSI_CLASS:
5513 /* Merge TImodes on aligned occasions here too. */
5514 if (i * 8 + 8 > bytes)
5515 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
5516 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
5520 /* We've requested 24 bytes we don't have mode for. Use DImode. */
5521 if (tmpmode == BLKmode)
5523 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5524 gen_rtx_REG (tmpmode, *intreg),
5528 case X86_64_SSESF_CLASS:
5529 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5530 gen_rtx_REG (SFmode,
5531 SSE_REGNO (sse_regno)),
5535 case X86_64_SSEDF_CLASS:
5536 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5537 gen_rtx_REG (DFmode,
5538 SSE_REGNO (sse_regno)),
5542 case X86_64_SSE_CLASS:
5550 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
5560 && regclass[1] == X86_64_SSEUP_CLASS
5561 && regclass[2] == X86_64_SSEUP_CLASS
5562 && regclass[3] == X86_64_SSEUP_CLASS);
5569 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5570 gen_rtx_REG (tmpmode,
5571 SSE_REGNO (sse_regno)),
5580 /* Empty aligned struct, union or class. */
5584 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
5585 for (i = 0; i < nexps; i++)
5586 XVECEXP (ret, 0, i) = exp [i];
5590 /* Update the data in CUM to advance over an argument of mode MODE
5591 and data type TYPE. (TYPE is null for libcalls where that information
5592 may not be available.) */
5595 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5596 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5612 cum->words += words;
5613 cum->nregs -= words;
5614 cum->regno += words;
5616 if (cum->nregs <= 0)
5624 /* OImode shouldn't be used directly. */
5628 if (cum->float_in_sse < 2)
5631 if (cum->float_in_sse < 1)
5648 if (!type || !AGGREGATE_TYPE_P (type))
5650 cum->sse_words += words;
5651 cum->sse_nregs -= 1;
5652 cum->sse_regno += 1;
5653 if (cum->sse_nregs <= 0)
5666 if (!type || !AGGREGATE_TYPE_P (type))
5668 cum->mmx_words += words;
5669 cum->mmx_nregs -= 1;
5670 cum->mmx_regno += 1;
5671 if (cum->mmx_nregs <= 0)
5682 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5683 tree type, HOST_WIDE_INT words, int named)
5685 int int_nregs, sse_nregs;
5687 /* Unnamed 256bit vector mode parameters are passed on stack. */
5688 if (!named && VALID_AVX256_REG_MODE (mode))
5691 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
5692 cum->words += words;
5693 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
5695 cum->nregs -= int_nregs;
5696 cum->sse_nregs -= sse_nregs;
5697 cum->regno += int_nregs;
5698 cum->sse_regno += sse_nregs;
5701 cum->words += words;
5705 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
5706 HOST_WIDE_INT words)
5708 /* Otherwise, this should be passed indirect. */
5709 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
5711 cum->words += words;
5720 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5721 tree type, int named)
5723 HOST_WIDE_INT bytes, words;
5725 if (mode == BLKmode)
5726 bytes = int_size_in_bytes (type);
5728 bytes = GET_MODE_SIZE (mode);
5729 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5732 mode = type_natural_mode (type, NULL);
5734 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
5735 function_arg_advance_ms_64 (cum, bytes, words);
5736 else if (TARGET_64BIT)
5737 function_arg_advance_64 (cum, mode, type, words, named);
5739 function_arg_advance_32 (cum, mode, type, bytes, words);
5742 /* Define where to put the arguments to a function.
5743 Value is zero to push the argument on the stack,
5744 or a hard register in which to store the argument.
5746 MODE is the argument's machine mode.
5747 TYPE is the data type of the argument (as a tree).
5748 This is null for libcalls where that information may
5750 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5751 the preceding args and about the function being called.
5752 NAMED is nonzero if this argument is a named parameter
5753 (otherwise it is an extra parameter matching an ellipsis). */
5756 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5757 enum machine_mode orig_mode, tree type,
5758 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5760 static bool warnedsse, warnedmmx;
5762 /* Avoid the AL settings for the Unix64 ABI. */
5763 if (mode == VOIDmode)
5779 if (words <= cum->nregs)
5781 int regno = cum->regno;
5783 /* Fastcall allocates the first two DWORD (SImode) or
5784 smaller arguments to ECX and EDX if it isn't an
5790 || (type && AGGREGATE_TYPE_P (type)))
5793 /* ECX not EAX is the first allocated register. */
5794 if (regno == AX_REG)
5797 return gen_rtx_REG (mode, regno);
5802 if (cum->float_in_sse < 2)
5805 if (cum->float_in_sse < 1)
5809 /* In 32bit, we pass TImode in xmm registers. */
5816 if (!type || !AGGREGATE_TYPE_P (type))
5818 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
5821 warning (0, "SSE vector argument without SSE enabled "
5825 return gen_reg_or_parallel (mode, orig_mode,
5826 cum->sse_regno + FIRST_SSE_REG);
5831 /* OImode shouldn't be used directly. */
5840 if (!type || !AGGREGATE_TYPE_P (type))
5843 return gen_reg_or_parallel (mode, orig_mode,
5844 cum->sse_regno + FIRST_SSE_REG);
5853 if (!type || !AGGREGATE_TYPE_P (type))
5855 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
5858 warning (0, "MMX vector argument without MMX enabled "
5862 return gen_reg_or_parallel (mode, orig_mode,
5863 cum->mmx_regno + FIRST_MMX_REG);
5872 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5873 enum machine_mode orig_mode, tree type, int named)
5875 /* Handle a hidden AL argument containing number of registers
5876 for varargs x86-64 functions. */
5877 if (mode == VOIDmode)
5878 return GEN_INT (cum->maybe_vaarg
5879 ? (cum->sse_nregs < 0
5880 ? (cum->call_abi == ix86_abi
5882 : (ix86_abi != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
5883 : X64_SSE_REGPARM_MAX))
5898 /* Unnamed 256bit vector mode parameters are passed on stack. */
5904 return construct_container (mode, orig_mode, type, 0, cum->nregs,
5906 &x86_64_int_parameter_registers [cum->regno],
5911 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5912 enum machine_mode orig_mode, int named,
5913 HOST_WIDE_INT bytes)
5917 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
5918 We use value of -2 to specify that current function call is MSABI. */
5919 if (mode == VOIDmode)
5920 return GEN_INT (-2);
5922 /* If we've run out of registers, it goes on the stack. */
5923 if (cum->nregs == 0)
5926 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
5928 /* Only floating point modes are passed in anything but integer regs. */
5929 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
5932 regno = cum->regno + FIRST_SSE_REG;
5937 /* Unnamed floating parameters are passed in both the
5938 SSE and integer registers. */
5939 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
5940 t2 = gen_rtx_REG (mode, regno);
5941 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
5942 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
5943 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
5946 /* Handle aggregated types passed in register. */
5947 if (orig_mode == BLKmode)
5949 if (bytes > 0 && bytes <= 8)
5950 mode = (bytes > 4 ? DImode : SImode);
5951 if (mode == BLKmode)
5955 return gen_reg_or_parallel (mode, orig_mode, regno);
5959 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
5960 tree type, int named)
5962 enum machine_mode mode = omode;
5963 HOST_WIDE_INT bytes, words;
5965 if (mode == BLKmode)
5966 bytes = int_size_in_bytes (type);
5968 bytes = GET_MODE_SIZE (mode);
5969 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5971 /* To simplify the code below, represent vector types with a vector mode
5972 even if MMX/SSE are not active. */
5973 if (type && TREE_CODE (type) == VECTOR_TYPE)
5974 mode = type_natural_mode (type, cum);
5976 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
5977 return function_arg_ms_64 (cum, mode, omode, named, bytes);
5978 else if (TARGET_64BIT)
5979 return function_arg_64 (cum, mode, omode, type, named);
5981 return function_arg_32 (cum, mode, omode, type, bytes, words);
5984 /* A C expression that indicates when an argument must be passed by
5985 reference. If nonzero for an argument, a copy of that argument is
5986 made in memory and a pointer to the argument is passed instead of
5987 the argument itself. The pointer is passed in whatever way is
5988 appropriate for passing a pointer to that type. */
5991 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
5992 enum machine_mode mode ATTRIBUTE_UNUSED,
5993 const_tree type, bool named ATTRIBUTE_UNUSED)
5995 /* See Windows x64 Software Convention. */
5996 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
5998 int msize = (int) GET_MODE_SIZE (mode);
6001 /* Arrays are passed by reference. */
6002 if (TREE_CODE (type) == ARRAY_TYPE)
6005 if (AGGREGATE_TYPE_P (type))
6007 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6008 are passed by reference. */
6009 msize = int_size_in_bytes (type);
6013 /* __m128 is passed by reference. */
6015 case 1: case 2: case 4: case 8:
6021 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6027 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
6030 contains_aligned_value_p (tree type)
6032 enum machine_mode mode = TYPE_MODE (type);
6033 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6037 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6039 if (TYPE_ALIGN (type) < 128)
6042 if (AGGREGATE_TYPE_P (type))
6044 /* Walk the aggregates recursively. */
6045 switch (TREE_CODE (type))
6049 case QUAL_UNION_TYPE:
6053 /* Walk all the structure fields. */
6054 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6056 if (TREE_CODE (field) == FIELD_DECL
6057 && contains_aligned_value_p (TREE_TYPE (field)))
6064 /* Just for use if some languages passes arrays by value. */
6065 if (contains_aligned_value_p (TREE_TYPE (type)))
6076 /* Gives the alignment boundary, in bits, of an argument with the
6077 specified mode and type. */
6080 ix86_function_arg_boundary (enum machine_mode mode, tree type)
6085 /* Since canonical type is used for call, we convert it to
6086 canonical type if needed. */
6087 if (!TYPE_STRUCTURAL_EQUALITY_P (type))
6088 type = TYPE_CANONICAL (type);
6089 align = TYPE_ALIGN (type);
6092 align = GET_MODE_ALIGNMENT (mode);
6093 if (align < PARM_BOUNDARY)
6094 align = PARM_BOUNDARY;
6095 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6096 natural boundaries. */
6097 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6099 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6100 make an exception for SSE modes since these require 128bit
6103 The handling here differs from field_alignment. ICC aligns MMX
6104 arguments to 4 byte boundaries, while structure fields are aligned
6105 to 8 byte boundaries. */
6108 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6109 align = PARM_BOUNDARY;
6113 if (!contains_aligned_value_p (type))
6114 align = PARM_BOUNDARY;
6117 if (align > BIGGEST_ALIGNMENT)
6118 align = BIGGEST_ALIGNMENT;
6122 /* Return true if N is a possible register number of function value. */
6125 ix86_function_value_regno_p (int regno)
6132 case FIRST_FLOAT_REG:
6133 /* TODO: The function should depend on current function ABI but
6134 builtins.c would need updating then. Therefore we use the
6136 if (TARGET_64BIT && ix86_abi == MS_ABI)
6138 return TARGET_FLOAT_RETURNS_IN_80387;
6144 if (TARGET_MACHO || TARGET_64BIT)
6152 /* Define how to find the value returned by a function.
6153 VALTYPE is the data type of the value (as a tree).
6154 If the precise function being called is known, FUNC is its FUNCTION_DECL;
6155 otherwise, FUNC is 0. */
6158 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
6159 const_tree fntype, const_tree fn)
6163 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
6164 we normally prevent this case when mmx is not available. However
6165 some ABIs may require the result to be returned like DImode. */
6166 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6167 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
6169 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
6170 we prevent this case when sse is not available. However some ABIs
6171 may require the result to be returned like integer TImode. */
6172 else if (mode == TImode
6173 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6174 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
6176 /* 32-byte vector modes in %ymm0. */
6177 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
6178 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
6180 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
6181 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
6182 regno = FIRST_FLOAT_REG;
6184 /* Most things go in %eax. */
6187 /* Override FP return register with %xmm0 for local functions when
6188 SSE math is enabled or for functions with sseregparm attribute. */
6189 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
6191 int sse_level = ix86_function_sseregparm (fntype, fn, false);
6192 if ((sse_level >= 1 && mode == SFmode)
6193 || (sse_level == 2 && mode == DFmode))
6194 regno = FIRST_SSE_REG;
6197 /* OImode shouldn't be used directly. */
6198 gcc_assert (mode != OImode);
6200 return gen_rtx_REG (orig_mode, regno);
6204 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
6209 /* Handle libcalls, which don't provide a type node. */
6210 if (valtype == NULL)
6222 return gen_rtx_REG (mode, FIRST_SSE_REG);
6225 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
6229 return gen_rtx_REG (mode, AX_REG);
6233 ret = construct_container (mode, orig_mode, valtype, 1,
6234 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6235 x86_64_int_return_registers, 0);
6237 /* For zero sized structures, construct_container returns NULL, but we
6238 need to keep rest of compiler happy by returning meaningful value. */
6240 ret = gen_rtx_REG (orig_mode, AX_REG);
6246 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6248 unsigned int regno = AX_REG;
6252 switch (GET_MODE_SIZE (mode))
6255 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6256 && !COMPLEX_MODE_P (mode))
6257 regno = FIRST_SSE_REG;
6261 if (mode == SFmode || mode == DFmode)
6262 regno = FIRST_SSE_REG;
6268 return gen_rtx_REG (orig_mode, regno);
6272 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6273 enum machine_mode orig_mode, enum machine_mode mode)
6275 const_tree fn, fntype;
6278 if (fntype_or_decl && DECL_P (fntype_or_decl))
6279 fn = fntype_or_decl;
6280 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6282 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6283 return function_value_ms_64 (orig_mode, mode);
6284 else if (TARGET_64BIT)
6285 return function_value_64 (orig_mode, mode, valtype);
6287 return function_value_32 (orig_mode, mode, fntype, fn);
6291 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6292 bool outgoing ATTRIBUTE_UNUSED)
6294 enum machine_mode mode, orig_mode;
6296 orig_mode = TYPE_MODE (valtype);
6297 mode = type_natural_mode (valtype, NULL);
6298 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6302 ix86_libcall_value (enum machine_mode mode)
6304 return ix86_function_value_1 (NULL, NULL, mode, mode);
6307 /* Return true iff type is returned in memory. */
6309 static int ATTRIBUTE_UNUSED
6310 return_in_memory_32 (const_tree type, enum machine_mode mode)
6314 if (mode == BLKmode)
6317 size = int_size_in_bytes (type);
6319 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6322 if (VECTOR_MODE_P (mode) || mode == TImode)
6324 /* User-created vectors small enough to fit in EAX. */
6328 /* MMX/3dNow values are returned in MM0,
6329 except when it doesn't exits. */
6331 return (TARGET_MMX ? 0 : 1);
6333 /* SSE values are returned in XMM0, except when it doesn't exist. */
6335 return (TARGET_SSE ? 0 : 1);
6337 /* AVX values are returned in YMM0, except when it doesn't exist. */
6339 return TARGET_AVX ? 0 : 1;
6348 /* OImode shouldn't be used directly. */
6349 gcc_assert (mode != OImode);
6354 static int ATTRIBUTE_UNUSED
6355 return_in_memory_64 (const_tree type, enum machine_mode mode)
6357 int needed_intregs, needed_sseregs;
6358 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6361 static int ATTRIBUTE_UNUSED
6362 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6364 HOST_WIDE_INT size = int_size_in_bytes (type);
6366 /* __m128 is returned in xmm0. */
6367 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6368 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6371 /* Otherwise, the size must be exactly in [1248]. */
6372 return (size != 1 && size != 2 && size != 4 && size != 8);
6376 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6378 #ifdef SUBTARGET_RETURN_IN_MEMORY
6379 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6381 const enum machine_mode mode = type_natural_mode (type, NULL);
6385 if (ix86_function_type_abi (fntype) == MS_ABI)
6386 return return_in_memory_ms_64 (type, mode);
6388 return return_in_memory_64 (type, mode);
6391 return return_in_memory_32 (type, mode);
6395 /* Return false iff TYPE is returned in memory. This version is used
6396 on Solaris 10. It is similar to the generic ix86_return_in_memory,
6397 but differs notably in that when MMX is available, 8-byte vectors
6398 are returned in memory, rather than in MMX registers. */
6401 ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6404 enum machine_mode mode = type_natural_mode (type, NULL);
6407 return return_in_memory_64 (type, mode);
6409 if (mode == BLKmode)
6412 size = int_size_in_bytes (type);
6414 if (VECTOR_MODE_P (mode))
6416 /* Return in memory only if MMX registers *are* available. This
6417 seems backwards, but it is consistent with the existing
6424 else if (mode == TImode)
6426 else if (mode == XFmode)
6432 /* When returning SSE vector types, we have a choice of either
6433 (1) being abi incompatible with a -march switch, or
6434 (2) generating an error.
6435 Given no good solution, I think the safest thing is one warning.
6436 The user won't be able to use -Werror, but....
6438 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6439 called in response to actually generating a caller or callee that
6440 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6441 via aggregate_value_p for general type probing from tree-ssa. */
6444 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6446 static bool warnedsse, warnedmmx;
6448 if (!TARGET_64BIT && type)
6450 /* Look at the return type of the function, not the function type. */
6451 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6453 if (!TARGET_SSE && !warnedsse)
6456 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6459 warning (0, "SSE vector return without SSE enabled "
6464 if (!TARGET_MMX && !warnedmmx)
6466 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6469 warning (0, "MMX vector return without MMX enabled "
6479 /* Create the va_list data type. */
6481 /* Returns the calling convention specific va_list date type.
6482 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
6485 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6487 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6489 /* For i386 we use plain pointer to argument area. */
6490 if (!TARGET_64BIT || abi == MS_ABI)
6491 return build_pointer_type (char_type_node);
6493 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6494 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
6496 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
6497 unsigned_type_node);
6498 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
6499 unsigned_type_node);
6500 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
6502 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
6505 va_list_gpr_counter_field = f_gpr;
6506 va_list_fpr_counter_field = f_fpr;
6508 DECL_FIELD_CONTEXT (f_gpr) = record;
6509 DECL_FIELD_CONTEXT (f_fpr) = record;
6510 DECL_FIELD_CONTEXT (f_ovf) = record;
6511 DECL_FIELD_CONTEXT (f_sav) = record;
6513 TREE_CHAIN (record) = type_decl;
6514 TYPE_NAME (record) = type_decl;
6515 TYPE_FIELDS (record) = f_gpr;
6516 TREE_CHAIN (f_gpr) = f_fpr;
6517 TREE_CHAIN (f_fpr) = f_ovf;
6518 TREE_CHAIN (f_ovf) = f_sav;
6520 layout_type (record);
6522 /* The correct type is an array type of one element. */
6523 return build_array_type (record, build_index_type (size_zero_node));
6526 /* Setup the builtin va_list data type and for 64-bit the additional
6527 calling convention specific va_list data types. */
6530 ix86_build_builtin_va_list (void)
6532 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
6534 /* Initialize abi specific va_list builtin types. */
6538 if (ix86_abi == MS_ABI)
6540 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
6541 if (TREE_CODE (t) != RECORD_TYPE)
6542 t = build_variant_type_copy (t);
6543 sysv_va_list_type_node = t;
6548 if (TREE_CODE (t) != RECORD_TYPE)
6549 t = build_variant_type_copy (t);
6550 sysv_va_list_type_node = t;
6552 if (ix86_abi != MS_ABI)
6554 t = ix86_build_builtin_va_list_abi (MS_ABI);
6555 if (TREE_CODE (t) != RECORD_TYPE)
6556 t = build_variant_type_copy (t);
6557 ms_va_list_type_node = t;
6562 if (TREE_CODE (t) != RECORD_TYPE)
6563 t = build_variant_type_copy (t);
6564 ms_va_list_type_node = t;
6571 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
6574 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
6583 int regparm = ix86_regparm;
6585 if (cum->call_abi != ix86_abi)
6586 regparm = ix86_abi != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
6588 /* GPR size of varargs save area. */
6589 if (cfun->va_list_gpr_size)
6590 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
6592 ix86_varargs_gpr_size = 0;
6594 /* FPR size of varargs save area. We don't need it if we don't pass
6595 anything in SSE registers. */
6596 if (cum->sse_nregs && cfun->va_list_fpr_size)
6597 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
6599 ix86_varargs_fpr_size = 0;
6601 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
6604 save_area = frame_pointer_rtx;
6605 set = get_varargs_alias_set ();
6607 for (i = cum->regno;
6609 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
6612 mem = gen_rtx_MEM (Pmode,
6613 plus_constant (save_area, i * UNITS_PER_WORD));
6614 MEM_NOTRAP_P (mem) = 1;
6615 set_mem_alias_set (mem, set);
6616 emit_move_insn (mem, gen_rtx_REG (Pmode,
6617 x86_64_int_parameter_registers[i]));
6620 if (ix86_varargs_fpr_size)
6622 /* Now emit code to save SSE registers. The AX parameter contains number
6623 of SSE parameter registers used to call this function. We use
6624 sse_prologue_save insn template that produces computed jump across
6625 SSE saves. We need some preparation work to get this working. */
6627 label = gen_label_rtx ();
6628 label_ref = gen_rtx_LABEL_REF (Pmode, label);
6630 /* Compute address to jump to :
6631 label - eax*4 + nnamed_sse_arguments*4 Or
6632 label - eax*5 + nnamed_sse_arguments*5 for AVX. */
6633 tmp_reg = gen_reg_rtx (Pmode);
6634 nsse_reg = gen_reg_rtx (Pmode);
6635 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
6636 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6637 gen_rtx_MULT (Pmode, nsse_reg,
6640 /* vmovaps is one byte longer than movaps. */
6642 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6643 gen_rtx_PLUS (Pmode, tmp_reg,
6649 gen_rtx_CONST (DImode,
6650 gen_rtx_PLUS (DImode,
6652 GEN_INT (cum->sse_regno
6653 * (TARGET_AVX ? 5 : 4)))));
6655 emit_move_insn (nsse_reg, label_ref);
6656 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
6658 /* Compute address of memory block we save into. We always use pointer
6659 pointing 127 bytes after first byte to store - this is needed to keep
6660 instruction size limited by 4 bytes (5 bytes for AVX) with one
6661 byte displacement. */
6662 tmp_reg = gen_reg_rtx (Pmode);
6663 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6664 plus_constant (save_area,
6665 ix86_varargs_gpr_size + 127)));
6666 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
6667 MEM_NOTRAP_P (mem) = 1;
6668 set_mem_alias_set (mem, set);
6669 set_mem_align (mem, BITS_PER_WORD);
6671 /* And finally do the dirty job! */
6672 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
6673 GEN_INT (cum->sse_regno), label));
6678 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
6680 alias_set_type set = get_varargs_alias_set ();
6683 for (i = cum->regno; i < X64_REGPARM_MAX; i++)
6687 mem = gen_rtx_MEM (Pmode,
6688 plus_constant (virtual_incoming_args_rtx,
6689 i * UNITS_PER_WORD));
6690 MEM_NOTRAP_P (mem) = 1;
6691 set_mem_alias_set (mem, set);
6693 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
6694 emit_move_insn (mem, reg);
6699 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6700 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6703 CUMULATIVE_ARGS next_cum;
6706 /* This argument doesn't appear to be used anymore. Which is good,
6707 because the old code here didn't suppress rtl generation. */
6708 gcc_assert (!no_rtl);
6713 fntype = TREE_TYPE (current_function_decl);
6715 /* For varargs, we do not want to skip the dummy va_dcl argument.
6716 For stdargs, we do want to skip the last named argument. */
6718 if (stdarg_p (fntype))
6719 function_arg_advance (&next_cum, mode, type, 1);
6721 if (cum->call_abi == MS_ABI)
6722 setup_incoming_varargs_ms_64 (&next_cum);
6724 setup_incoming_varargs_64 (&next_cum);
6727 /* Checks if TYPE is of kind va_list char *. */
6730 is_va_list_char_pointer (tree type)
6734 /* For 32-bit it is always true. */
6737 canonic = ix86_canonical_va_list_type (type);
6738 return (canonic == ms_va_list_type_node
6739 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
6742 /* Implement va_start. */
6745 ix86_va_start (tree valist, rtx nextarg)
6747 HOST_WIDE_INT words, n_gpr, n_fpr;
6748 tree f_gpr, f_fpr, f_ovf, f_sav;
6749 tree gpr, fpr, ovf, sav, t;
6752 /* Only 64bit target needs something special. */
6753 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6755 std_expand_builtin_va_start (valist, nextarg);
6759 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6760 f_fpr = TREE_CHAIN (f_gpr);
6761 f_ovf = TREE_CHAIN (f_fpr);
6762 f_sav = TREE_CHAIN (f_ovf);
6764 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
6765 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
6766 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6767 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6768 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6770 /* Count number of gp and fp argument registers used. */
6771 words = crtl->args.info.words;
6772 n_gpr = crtl->args.info.regno;
6773 n_fpr = crtl->args.info.sse_regno;
6775 if (cfun->va_list_gpr_size)
6777 type = TREE_TYPE (gpr);
6778 t = build2 (MODIFY_EXPR, type,
6779 gpr, build_int_cst (type, n_gpr * 8));
6780 TREE_SIDE_EFFECTS (t) = 1;
6781 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6784 if (TARGET_SSE && cfun->va_list_fpr_size)
6786 type = TREE_TYPE (fpr);
6787 t = build2 (MODIFY_EXPR, type, fpr,
6788 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
6789 TREE_SIDE_EFFECTS (t) = 1;
6790 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6793 /* Find the overflow area. */
6794 type = TREE_TYPE (ovf);
6795 t = make_tree (type, crtl->args.internal_arg_pointer);
6797 t = build2 (POINTER_PLUS_EXPR, type, t,
6798 size_int (words * UNITS_PER_WORD));
6799 t = build2 (MODIFY_EXPR, type, ovf, t);
6800 TREE_SIDE_EFFECTS (t) = 1;
6801 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6803 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
6805 /* Find the register save area.
6806 Prologue of the function save it right above stack frame. */
6807 type = TREE_TYPE (sav);
6808 t = make_tree (type, frame_pointer_rtx);
6809 if (!ix86_varargs_gpr_size)
6810 t = build2 (POINTER_PLUS_EXPR, type, t,
6811 size_int (-8 * X86_64_REGPARM_MAX));
6812 t = build2 (MODIFY_EXPR, type, sav, t);
6813 TREE_SIDE_EFFECTS (t) = 1;
6814 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6818 /* Implement va_arg. */
6821 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6824 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
6825 tree f_gpr, f_fpr, f_ovf, f_sav;
6826 tree gpr, fpr, ovf, sav, t;
6828 tree lab_false, lab_over = NULL_TREE;
6833 enum machine_mode nat_mode;
6836 /* Only 64bit target needs something special. */
6837 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6838 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6840 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6841 f_fpr = TREE_CHAIN (f_gpr);
6842 f_ovf = TREE_CHAIN (f_fpr);
6843 f_sav = TREE_CHAIN (f_ovf);
6845 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
6846 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
6847 valist = build_va_arg_indirect_ref (valist);
6848 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6849 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6850 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6852 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6854 type = build_pointer_type (type);
6855 size = int_size_in_bytes (type);
6856 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6858 nat_mode = type_natural_mode (type, NULL);
6867 /* Unnamed 256bit vector mode parameters are passed on stack. */
6868 if (ix86_cfun_abi () == SYSV_ABI)
6875 container = construct_container (nat_mode, TYPE_MODE (type),
6876 type, 0, X86_64_REGPARM_MAX,
6877 X86_64_SSE_REGPARM_MAX, intreg,
6882 /* Pull the value out of the saved registers. */
6884 addr = create_tmp_var (ptr_type_node, "addr");
6885 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
6889 int needed_intregs, needed_sseregs;
6891 tree int_addr, sse_addr;
6893 lab_false = create_artificial_label ();
6894 lab_over = create_artificial_label ();
6896 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
6898 need_temp = (!REG_P (container)
6899 && ((needed_intregs && TYPE_ALIGN (type) > 64)
6900 || TYPE_ALIGN (type) > 128));
6902 /* In case we are passing structure, verify that it is consecutive block
6903 on the register save area. If not we need to do moves. */
6904 if (!need_temp && !REG_P (container))
6906 /* Verify that all registers are strictly consecutive */
6907 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
6911 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6913 rtx slot = XVECEXP (container, 0, i);
6914 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
6915 || INTVAL (XEXP (slot, 1)) != i * 16)
6923 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6925 rtx slot = XVECEXP (container, 0, i);
6926 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
6927 || INTVAL (XEXP (slot, 1)) != i * 8)
6939 int_addr = create_tmp_var (ptr_type_node, "int_addr");
6940 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
6941 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
6942 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
6945 /* First ensure that we fit completely in registers. */
6948 t = build_int_cst (TREE_TYPE (gpr),
6949 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
6950 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
6951 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6952 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6953 gimplify_and_add (t, pre_p);
6957 t = build_int_cst (TREE_TYPE (fpr),
6958 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
6959 + X86_64_REGPARM_MAX * 8);
6960 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
6961 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6962 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6963 gimplify_and_add (t, pre_p);
6966 /* Compute index to start of area used for integer regs. */
6969 /* int_addr = gpr + sav; */
6970 t = fold_convert (sizetype, gpr);
6971 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6972 gimplify_assign (int_addr, t, pre_p);
6976 /* sse_addr = fpr + sav; */
6977 t = fold_convert (sizetype, fpr);
6978 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6979 gimplify_assign (sse_addr, t, pre_p);
6984 tree temp = create_tmp_var (type, "va_arg_tmp");
6987 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
6988 gimplify_assign (addr, t, pre_p);
6990 for (i = 0; i < XVECLEN (container, 0); i++)
6992 rtx slot = XVECEXP (container, 0, i);
6993 rtx reg = XEXP (slot, 0);
6994 enum machine_mode mode = GET_MODE (reg);
6995 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
6996 tree addr_type = build_pointer_type (piece_type);
6997 tree daddr_type = build_pointer_type_for_mode (piece_type,
7001 tree dest_addr, dest;
7003 if (SSE_REGNO_P (REGNO (reg)))
7005 src_addr = sse_addr;
7006 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7010 src_addr = int_addr;
7011 src_offset = REGNO (reg) * 8;
7013 src_addr = fold_convert (addr_type, src_addr);
7014 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
7015 size_int (src_offset));
7016 src = build_va_arg_indirect_ref (src_addr);
7018 dest_addr = fold_convert (daddr_type, addr);
7019 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
7020 size_int (INTVAL (XEXP (slot, 1))));
7021 dest = build_va_arg_indirect_ref (dest_addr);
7023 gimplify_assign (dest, src, pre_p);
7029 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7030 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7031 gimplify_assign (gpr, t, pre_p);
7036 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7037 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7038 gimplify_assign (fpr, t, pre_p);
7041 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7043 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7046 /* ... otherwise out of the overflow area. */
7048 /* When we align parameter on stack for caller, if the parameter
7049 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7050 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7051 here with caller. */
7052 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
7053 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7054 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7056 /* Care for on-stack alignment if needed. */
7057 if (arg_boundary <= 64
7058 || integer_zerop (TYPE_SIZE (type)))
7062 HOST_WIDE_INT align = arg_boundary / 8;
7063 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
7064 size_int (align - 1));
7065 t = fold_convert (sizetype, t);
7066 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7068 t = fold_convert (TREE_TYPE (ovf), t);
7070 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7071 gimplify_assign (addr, t, pre_p);
7073 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
7074 size_int (rsize * UNITS_PER_WORD));
7075 gimplify_assign (unshare_expr (ovf), t, pre_p);
7078 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7080 ptrtype = build_pointer_type (type);
7081 addr = fold_convert (ptrtype, addr);
7084 addr = build_va_arg_indirect_ref (addr);
7085 return build_va_arg_indirect_ref (addr);
7088 /* Return nonzero if OPNUM's MEM should be matched
7089 in movabs* patterns. */
7092 ix86_check_movabs (rtx insn, int opnum)
7096 set = PATTERN (insn);
7097 if (GET_CODE (set) == PARALLEL)
7098 set = XVECEXP (set, 0, 0);
7099 gcc_assert (GET_CODE (set) == SET);
7100 mem = XEXP (set, opnum);
7101 while (GET_CODE (mem) == SUBREG)
7102 mem = SUBREG_REG (mem);
7103 gcc_assert (MEM_P (mem));
7104 return (volatile_ok || !MEM_VOLATILE_P (mem));
7107 /* Initialize the table of extra 80387 mathematical constants. */
7110 init_ext_80387_constants (void)
7112 static const char * cst[5] =
7114 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
7115 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
7116 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
7117 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
7118 "3.1415926535897932385128089594061862044", /* 4: fldpi */
7122 for (i = 0; i < 5; i++)
7124 real_from_string (&ext_80387_constants_table[i], cst[i]);
7125 /* Ensure each constant is rounded to XFmode precision. */
7126 real_convert (&ext_80387_constants_table[i],
7127 XFmode, &ext_80387_constants_table[i]);
7130 ext_80387_constants_init = 1;
7133 /* Return true if the constant is something that can be loaded with
7134 a special instruction. */
7137 standard_80387_constant_p (rtx x)
7139 enum machine_mode mode = GET_MODE (x);
7143 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
7146 if (x == CONST0_RTX (mode))
7148 if (x == CONST1_RTX (mode))
7151 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7153 /* For XFmode constants, try to find a special 80387 instruction when
7154 optimizing for size or on those CPUs that benefit from them. */
7156 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
7160 if (! ext_80387_constants_init)
7161 init_ext_80387_constants ();
7163 for (i = 0; i < 5; i++)
7164 if (real_identical (&r, &ext_80387_constants_table[i]))
7168 /* Load of the constant -0.0 or -1.0 will be split as
7169 fldz;fchs or fld1;fchs sequence. */
7170 if (real_isnegzero (&r))
7172 if (real_identical (&r, &dconstm1))
7178 /* Return the opcode of the special instruction to be used to load
7182 standard_80387_constant_opcode (rtx x)
7184 switch (standard_80387_constant_p (x))
7208 /* Return the CONST_DOUBLE representing the 80387 constant that is
7209 loaded by the specified special instruction. The argument IDX
7210 matches the return value from standard_80387_constant_p. */
7213 standard_80387_constant_rtx (int idx)
7217 if (! ext_80387_constants_init)
7218 init_ext_80387_constants ();
7234 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
7238 /* Return 1 if mode is a valid mode for sse. */
7240 standard_sse_mode_p (enum machine_mode mode)
7257 /* Return 1 if X is all 0s. For all 1s, return 2 if X is in 128bit
7258 SSE modes and SSE2 is enabled, return 3 if X is in 256bit AVX
7259 modes and AVX is enabled. */
7262 standard_sse_constant_p (rtx x)
7264 enum machine_mode mode = GET_MODE (x);
7266 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
7268 if (vector_all_ones_operand (x, mode))
7270 if (standard_sse_mode_p (mode))
7271 return TARGET_SSE2 ? 2 : -2;
7272 else if (VALID_AVX256_REG_MODE (mode))
7273 return TARGET_AVX ? 3 : -3;
7279 /* Return the opcode of the special instruction to be used to load
7283 standard_sse_constant_opcode (rtx insn, rtx x)
7285 switch (standard_sse_constant_p (x))
7288 switch (get_attr_mode (insn))
7291 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7293 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7295 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7297 return "vxorps\t%x0, %x0, %x0";
7299 return "vxorpd\t%x0, %x0, %x0";
7301 return "vpxor\t%x0, %x0, %x0";
7307 switch (get_attr_mode (insn))
7312 return "vpcmpeqd\t%0, %0, %0";
7318 return "pcmpeqd\t%0, %0";
7323 /* Returns 1 if OP contains a symbol reference */
7326 symbolic_reference_mentioned_p (rtx op)
7331 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7334 fmt = GET_RTX_FORMAT (GET_CODE (op));
7335 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7341 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7342 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7346 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7353 /* Return 1 if it is appropriate to emit `ret' instructions in the
7354 body of a function. Do this only if the epilogue is simple, needing a
7355 couple of insns. Prior to reloading, we can't tell how many registers
7356 must be saved, so return 0 then. Return 0 if there is no frame
7357 marker to de-allocate. */
7360 ix86_can_use_return_insn_p (void)
7362 struct ix86_frame frame;
7364 if (! reload_completed || frame_pointer_needed)
7367 /* Don't allow more than 32 pop, since that's all we can do
7368 with one instruction. */
7369 if (crtl->args.pops_args
7370 && crtl->args.size >= 32768)
7373 ix86_compute_frame_layout (&frame);
7374 return frame.to_allocate == 0 && (frame.nregs + frame.nsseregs) == 0;
7377 /* Value should be nonzero if functions must have frame pointers.
7378 Zero means the frame pointer need not be set up (and parms may
7379 be accessed via the stack pointer) in functions that seem suitable. */
7382 ix86_frame_pointer_required (void)
7384 /* If we accessed previous frames, then the generated code expects
7385 to be able to access the saved ebp value in our frame. */
7386 if (cfun->machine->accesses_prev_frame)
7389 /* Several x86 os'es need a frame pointer for other reasons,
7390 usually pertaining to setjmp. */
7391 if (SUBTARGET_FRAME_POINTER_REQUIRED)
7394 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
7395 the frame pointer by default. Turn it back on now if we've not
7396 got a leaf function. */
7397 if (TARGET_OMIT_LEAF_FRAME_POINTER
7398 && (!current_function_is_leaf
7399 || ix86_current_function_calls_tls_descriptor))
7408 /* Record that the current function accesses previous call frames. */
7411 ix86_setup_frame_addresses (void)
7413 cfun->machine->accesses_prev_frame = 1;
7416 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7417 # define USE_HIDDEN_LINKONCE 1
7419 # define USE_HIDDEN_LINKONCE 0
7422 static int pic_labels_used;
7424 /* Fills in the label name that should be used for a pc thunk for
7425 the given register. */
7428 get_pc_thunk_name (char name[32], unsigned int regno)
7430 gcc_assert (!TARGET_64BIT);
7432 if (USE_HIDDEN_LINKONCE)
7433 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7435 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7439 /* This function generates code for -fpic that loads %ebx with
7440 the return address of the caller and then returns. */
7443 ix86_file_end (void)
7448 for (regno = 0; regno < 8; ++regno)
7452 if (! ((pic_labels_used >> regno) & 1))
7455 get_pc_thunk_name (name, regno);
7460 switch_to_section (darwin_sections[text_coal_section]);
7461 fputs ("\t.weak_definition\t", asm_out_file);
7462 assemble_name (asm_out_file, name);
7463 fputs ("\n\t.private_extern\t", asm_out_file);
7464 assemble_name (asm_out_file, name);
7465 fputs ("\n", asm_out_file);
7466 ASM_OUTPUT_LABEL (asm_out_file, name);
7470 if (USE_HIDDEN_LINKONCE)
7474 decl = build_decl (FUNCTION_DECL, get_identifier (name),
7476 TREE_PUBLIC (decl) = 1;
7477 TREE_STATIC (decl) = 1;
7478 DECL_ONE_ONLY (decl) = 1;
7480 (*targetm.asm_out.unique_section) (decl, 0);
7481 switch_to_section (get_named_section (decl, NULL, 0));
7483 (*targetm.asm_out.globalize_label) (asm_out_file, name);
7484 fputs ("\t.hidden\t", asm_out_file);
7485 assemble_name (asm_out_file, name);
7486 fputc ('\n', asm_out_file);
7487 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
7491 switch_to_section (text_section);
7492 ASM_OUTPUT_LABEL (asm_out_file, name);
7495 xops[0] = gen_rtx_REG (Pmode, regno);
7496 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
7497 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
7498 output_asm_insn ("ret", xops);
7501 if (NEED_INDICATE_EXEC_STACK)
7502 file_end_indicate_exec_stack ();
7505 /* Emit code for the SET_GOT patterns. */
7508 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
7514 if (TARGET_VXWORKS_RTP && flag_pic)
7516 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
7517 xops[2] = gen_rtx_MEM (Pmode,
7518 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
7519 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
7521 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
7522 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
7523 an unadorned address. */
7524 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7525 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
7526 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
7530 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
7532 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
7534 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
7537 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
7539 output_asm_insn ("call\t%a2", xops);
7542 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7543 is what will be referenced by the Mach-O PIC subsystem. */
7545 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7548 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7549 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
7552 output_asm_insn ("pop%z0\t%0", xops);
7557 get_pc_thunk_name (name, REGNO (dest));
7558 pic_labels_used |= 1 << REGNO (dest);
7560 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
7561 xops[2] = gen_rtx_MEM (QImode, xops[2]);
7562 output_asm_insn ("call\t%X2", xops);
7563 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7564 is what will be referenced by the Mach-O PIC subsystem. */
7567 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7569 targetm.asm_out.internal_label (asm_out_file, "L",
7570 CODE_LABEL_NUMBER (label));
7577 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
7578 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
7580 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
7585 /* Generate an "push" pattern for input ARG. */
7590 return gen_rtx_SET (VOIDmode,
7592 gen_rtx_PRE_DEC (Pmode,
7593 stack_pointer_rtx)),
7597 /* Return >= 0 if there is an unused call-clobbered register available
7598 for the entire function. */
7601 ix86_select_alt_pic_regnum (void)
7603 if (current_function_is_leaf && !crtl->profile
7604 && !ix86_current_function_calls_tls_descriptor)
7607 /* Can't use the same register for both PIC and DRAP. */
7609 drap = REGNO (crtl->drap_reg);
7612 for (i = 2; i >= 0; --i)
7613 if (i != drap && !df_regs_ever_live_p (i))
7617 return INVALID_REGNUM;
7620 /* Return 1 if we need to save REGNO. */
7622 ix86_save_reg (unsigned int regno, int maybe_eh_return)
7624 if (pic_offset_table_rtx
7625 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
7626 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
7628 || crtl->calls_eh_return
7629 || crtl->uses_const_pool))
7631 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
7636 if (crtl->calls_eh_return && maybe_eh_return)
7641 unsigned test = EH_RETURN_DATA_REGNO (i);
7642 if (test == INVALID_REGNUM)
7650 && regno == REGNO (crtl->drap_reg))
7653 return (df_regs_ever_live_p (regno)
7654 && !call_used_regs[regno]
7655 && !fixed_regs[regno]
7656 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
7659 /* Return number of saved general prupose registers. */
7662 ix86_nsaved_regs (void)
7667 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7668 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7673 /* Return number of saved SSE registrers. */
7676 ix86_nsaved_sseregs (void)
7681 if (ix86_cfun_abi () != MS_ABI)
7683 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7684 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7689 /* Given FROM and TO register numbers, say whether this elimination is
7690 allowed. If stack alignment is needed, we can only replace argument
7691 pointer with hard frame pointer, or replace frame pointer with stack
7692 pointer. Otherwise, frame pointer elimination is automatically
7693 handled and all other eliminations are valid. */
7696 ix86_can_eliminate (int from, int to)
7698 if (stack_realign_fp)
7699 return ((from == ARG_POINTER_REGNUM
7700 && to == HARD_FRAME_POINTER_REGNUM)
7701 || (from == FRAME_POINTER_REGNUM
7702 && to == STACK_POINTER_REGNUM));
7704 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : 1;
7707 /* Return the offset between two registers, one to be eliminated, and the other
7708 its replacement, at the start of a routine. */
7711 ix86_initial_elimination_offset (int from, int to)
7713 struct ix86_frame frame;
7714 ix86_compute_frame_layout (&frame);
7716 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7717 return frame.hard_frame_pointer_offset;
7718 else if (from == FRAME_POINTER_REGNUM
7719 && to == HARD_FRAME_POINTER_REGNUM)
7720 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
7723 gcc_assert (to == STACK_POINTER_REGNUM);
7725 if (from == ARG_POINTER_REGNUM)
7726 return frame.stack_pointer_offset;
7728 gcc_assert (from == FRAME_POINTER_REGNUM);
7729 return frame.stack_pointer_offset - frame.frame_pointer_offset;
7733 /* In a dynamically-aligned function, we can't know the offset from
7734 stack pointer to frame pointer, so we must ensure that setjmp
7735 eliminates fp against the hard fp (%ebp) rather than trying to
7736 index from %esp up to the top of the frame across a gap that is
7737 of unknown (at compile-time) size. */
7739 ix86_builtin_setjmp_frame_value (void)
7741 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
7744 /* Fill structure ix86_frame about frame of currently computed function. */
7747 ix86_compute_frame_layout (struct ix86_frame *frame)
7749 HOST_WIDE_INT total_size;
7750 unsigned int stack_alignment_needed;
7751 HOST_WIDE_INT offset;
7752 unsigned int preferred_alignment;
7753 HOST_WIDE_INT size = get_frame_size ();
7755 frame->nregs = ix86_nsaved_regs ();
7756 frame->nsseregs = ix86_nsaved_sseregs ();
7759 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
7760 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
7762 /* MS ABI seem to require stack alignment to be always 16 except for function
7764 if (ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
7766 preferred_alignment = 16;
7767 stack_alignment_needed = 16;
7768 crtl->preferred_stack_boundary = 128;
7769 crtl->stack_alignment_needed = 128;
7772 gcc_assert (!size || stack_alignment_needed);
7773 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
7774 gcc_assert (preferred_alignment <= stack_alignment_needed);
7776 /* During reload iteration the amount of registers saved can change.
7777 Recompute the value as needed. Do not recompute when amount of registers
7778 didn't change as reload does multiple calls to the function and does not
7779 expect the decision to change within single iteration. */
7780 if (!optimize_function_for_size_p (cfun)
7781 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
7783 int count = frame->nregs;
7785 cfun->machine->use_fast_prologue_epilogue_nregs = count;
7786 /* The fast prologue uses move instead of push to save registers. This
7787 is significantly longer, but also executes faster as modern hardware
7788 can execute the moves in parallel, but can't do that for push/pop.
7790 Be careful about choosing what prologue to emit: When function takes
7791 many instructions to execute we may use slow version as well as in
7792 case function is known to be outside hot spot (this is known with
7793 feedback only). Weight the size of function by number of registers
7794 to save as it is cheap to use one or two push instructions but very
7795 slow to use many of them. */
7797 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
7798 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
7799 || (flag_branch_probabilities
7800 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
7801 cfun->machine->use_fast_prologue_epilogue = false;
7803 cfun->machine->use_fast_prologue_epilogue
7804 = !expensive_function_p (count);
7806 if (TARGET_PROLOGUE_USING_MOVE
7807 && cfun->machine->use_fast_prologue_epilogue)
7808 frame->save_regs_using_mov = true;
7810 frame->save_regs_using_mov = false;
7813 /* Skip return address and saved base pointer. */
7814 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
7816 frame->hard_frame_pointer_offset = offset;
7818 /* Set offset to aligned because the realigned frame starts from
7820 if (stack_realign_fp)
7821 offset = (offset + stack_alignment_needed -1) & -stack_alignment_needed;
7823 /* Register save area */
7824 offset += frame->nregs * UNITS_PER_WORD;
7826 /* Align SSE reg save area. */
7827 if (frame->nsseregs)
7828 frame->padding0 = ((offset + 16 - 1) & -16) - offset;
7830 frame->padding0 = 0;
7832 /* SSE register save area. */
7833 offset += frame->padding0 + frame->nsseregs * 16;
7836 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
7837 offset += frame->va_arg_size;
7839 /* Align start of frame for local function. */
7840 frame->padding1 = ((offset + stack_alignment_needed - 1)
7841 & -stack_alignment_needed) - offset;
7843 offset += frame->padding1;
7845 /* Frame pointer points here. */
7846 frame->frame_pointer_offset = offset;
7850 /* Add outgoing arguments area. Can be skipped if we eliminated
7851 all the function calls as dead code.
7852 Skipping is however impossible when function calls alloca. Alloca
7853 expander assumes that last crtl->outgoing_args_size
7854 of stack frame are unused. */
7855 if (ACCUMULATE_OUTGOING_ARGS
7856 && (!current_function_is_leaf || cfun->calls_alloca
7857 || ix86_current_function_calls_tls_descriptor))
7859 offset += crtl->outgoing_args_size;
7860 frame->outgoing_arguments_size = crtl->outgoing_args_size;
7863 frame->outgoing_arguments_size = 0;
7865 /* Align stack boundary. Only needed if we're calling another function
7867 if (!current_function_is_leaf || cfun->calls_alloca
7868 || ix86_current_function_calls_tls_descriptor)
7869 frame->padding2 = ((offset + preferred_alignment - 1)
7870 & -preferred_alignment) - offset;
7872 frame->padding2 = 0;
7874 offset += frame->padding2;
7876 /* We've reached end of stack frame. */
7877 frame->stack_pointer_offset = offset;
7879 /* Size prologue needs to allocate. */
7880 frame->to_allocate =
7881 (size + frame->padding1 + frame->padding2
7882 + frame->outgoing_arguments_size + frame->va_arg_size);
7884 if ((!frame->to_allocate && frame->nregs <= 1)
7885 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
7886 frame->save_regs_using_mov = false;
7888 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && current_function_sp_is_unchanging
7889 && current_function_is_leaf
7890 && !ix86_current_function_calls_tls_descriptor)
7892 frame->red_zone_size = frame->to_allocate;
7893 if (frame->save_regs_using_mov)
7894 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
7895 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
7896 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
7899 frame->red_zone_size = 0;
7900 frame->to_allocate -= frame->red_zone_size;
7901 frame->stack_pointer_offset -= frame->red_zone_size;
7903 fprintf (stderr, "\n");
7904 fprintf (stderr, "size: %ld\n", (long)size);
7905 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
7906 fprintf (stderr, "nsseregs: %ld\n", (long)frame->nsseregs);
7907 fprintf (stderr, "padding0: %ld\n", (long)frame->padding0);
7908 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
7909 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
7910 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
7911 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
7912 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
7913 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
7914 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
7915 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
7916 (long)frame->hard_frame_pointer_offset);
7917 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
7918 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
7919 fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca);
7920 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
7924 /* Emit code to save registers in the prologue. */
7927 ix86_emit_save_regs (void)
7932 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
7933 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7935 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
7936 RTX_FRAME_RELATED_P (insn) = 1;
7940 /* Emit code to save registers using MOV insns. First register
7941 is restored from POINTER + OFFSET. */
7943 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
7948 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7949 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7951 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
7953 gen_rtx_REG (Pmode, regno));
7954 RTX_FRAME_RELATED_P (insn) = 1;
7955 offset += UNITS_PER_WORD;
7959 /* Emit code to save registers using MOV insns. First register
7960 is restored from POINTER + OFFSET. */
7962 ix86_emit_save_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
7968 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7969 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7971 mem = adjust_address (gen_rtx_MEM (TImode, pointer), TImode, offset);
7972 set_mem_align (mem, 128);
7973 insn = emit_move_insn (mem, gen_rtx_REG (TImode, regno));
7974 RTX_FRAME_RELATED_P (insn) = 1;
7979 /* Expand prologue or epilogue stack adjustment.
7980 The pattern exist to put a dependency on all ebp-based memory accesses.
7981 STYLE should be negative if instructions should be marked as frame related,
7982 zero if %r11 register is live and cannot be freely used and positive
7986 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
7991 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
7992 else if (x86_64_immediate_operand (offset, DImode))
7993 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
7997 /* r11 is used by indirect sibcall return as well, set before the
7998 epilogue and used after the epilogue. ATM indirect sibcall
7999 shouldn't be used together with huge frame sizes in one
8000 function because of the frame_size check in sibcall.c. */
8002 r11 = gen_rtx_REG (DImode, R11_REG);
8003 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
8005 RTX_FRAME_RELATED_P (insn) = 1;
8006 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
8010 RTX_FRAME_RELATED_P (insn) = 1;
8013 /* Find an available register to be used as dynamic realign argument
8014 pointer regsiter. Such a register will be written in prologue and
8015 used in begin of body, so it must not be
8016 1. parameter passing register.
8018 We reuse static-chain register if it is available. Otherwise, we
8019 use DI for i386 and R13 for x86-64. We chose R13 since it has
8022 Return: the regno of chosen register. */
8025 find_drap_reg (void)
8027 tree decl = cfun->decl;
8031 /* Use R13 for nested function or function need static chain.
8032 Since function with tail call may use any caller-saved
8033 registers in epilogue, DRAP must not use caller-saved
8034 register in such case. */
8035 if ((decl_function_context (decl)
8036 && !DECL_NO_STATIC_CHAIN (decl))
8037 || crtl->tail_call_emit)
8044 /* Use DI for nested function or function need static chain.
8045 Since function with tail call may use any caller-saved
8046 registers in epilogue, DRAP must not use caller-saved
8047 register in such case. */
8048 if ((decl_function_context (decl)
8049 && !DECL_NO_STATIC_CHAIN (decl))
8050 || crtl->tail_call_emit)
8053 /* Reuse static chain register if it isn't used for parameter
8055 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
8056 && !lookup_attribute ("fastcall",
8057 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
8064 /* Update incoming stack boundary and estimated stack alignment. */
8067 ix86_update_stack_boundary (void)
8069 /* Prefer the one specified at command line. */
8070 ix86_incoming_stack_boundary
8071 = (ix86_user_incoming_stack_boundary
8072 ? ix86_user_incoming_stack_boundary
8073 : ix86_default_incoming_stack_boundary);
8075 /* Incoming stack alignment can be changed on individual functions
8076 via force_align_arg_pointer attribute. We use the smallest
8077 incoming stack boundary. */
8078 if (ix86_incoming_stack_boundary > MIN_STACK_BOUNDARY
8079 && lookup_attribute (ix86_force_align_arg_pointer_string,
8080 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
8081 ix86_incoming_stack_boundary = MIN_STACK_BOUNDARY;
8083 /* The incoming stack frame has to be aligned at least at
8084 parm_stack_boundary. */
8085 if (ix86_incoming_stack_boundary < crtl->parm_stack_boundary)
8086 ix86_incoming_stack_boundary = crtl->parm_stack_boundary;
8088 /* Stack at entrance of main is aligned by runtime. We use the
8089 smallest incoming stack boundary. */
8090 if (ix86_incoming_stack_boundary > MAIN_STACK_BOUNDARY
8091 && DECL_NAME (current_function_decl)
8092 && MAIN_NAME_P (DECL_NAME (current_function_decl))
8093 && DECL_FILE_SCOPE_P (current_function_decl))
8094 ix86_incoming_stack_boundary = MAIN_STACK_BOUNDARY;
8096 /* x86_64 vararg needs 16byte stack alignment for register save
8100 && crtl->stack_alignment_estimated < 128)
8101 crtl->stack_alignment_estimated = 128;
8104 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
8105 needed or an rtx for DRAP otherwise. */
8108 ix86_get_drap_rtx (void)
8110 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
8111 crtl->need_drap = true;
8113 if (stack_realign_drap)
8115 /* Assign DRAP to vDRAP and returns vDRAP */
8116 unsigned int regno = find_drap_reg ();
8121 arg_ptr = gen_rtx_REG (Pmode, regno);
8122 crtl->drap_reg = arg_ptr;
8125 drap_vreg = copy_to_reg (arg_ptr);
8129 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
8130 RTX_FRAME_RELATED_P (insn) = 1;
8137 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
8140 ix86_internal_arg_pointer (void)
8142 return virtual_incoming_args_rtx;
8145 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
8146 This is called from dwarf2out.c to emit call frame instructions
8147 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
8149 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
8151 rtx unspec = SET_SRC (pattern);
8152 gcc_assert (GET_CODE (unspec) == UNSPEC);
8156 case UNSPEC_REG_SAVE:
8157 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
8158 SET_DEST (pattern));
8160 case UNSPEC_DEF_CFA:
8161 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
8162 INTVAL (XVECEXP (unspec, 0, 0)));
8169 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
8170 to be generated in correct form. */
8172 ix86_finalize_stack_realign_flags (void)
8174 /* Check if stack realign is really needed after reload, and
8175 stores result in cfun */
8176 unsigned int incoming_stack_boundary
8177 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
8178 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
8179 unsigned int stack_realign = (incoming_stack_boundary
8180 < (current_function_is_leaf
8181 ? crtl->max_used_stack_slot_alignment
8182 : crtl->stack_alignment_needed));
8184 if (crtl->stack_realign_finalized)
8186 /* After stack_realign_needed is finalized, we can't no longer
8188 gcc_assert (crtl->stack_realign_needed == stack_realign);
8192 crtl->stack_realign_needed = stack_realign;
8193 crtl->stack_realign_finalized = true;
8197 /* Expand the prologue into a bunch of separate insns. */
8200 ix86_expand_prologue (void)
8204 struct ix86_frame frame;
8205 HOST_WIDE_INT allocate;
8207 ix86_finalize_stack_realign_flags ();
8209 /* DRAP should not coexist with stack_realign_fp */
8210 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
8212 ix86_compute_frame_layout (&frame);
8214 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
8215 of DRAP is needed and stack realignment is really needed after reload */
8216 if (crtl->drap_reg && crtl->stack_realign_needed)
8219 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8220 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8221 ? 0 : UNITS_PER_WORD);
8223 gcc_assert (stack_realign_drap);
8225 /* Grab the argument pointer. */
8226 x = plus_constant (stack_pointer_rtx,
8227 (UNITS_PER_WORD + param_ptr_offset));
8230 /* Only need to push parameter pointer reg if it is caller
8232 if (!call_used_regs[REGNO (crtl->drap_reg)])
8234 /* Push arg pointer reg */
8235 insn = emit_insn (gen_push (y));
8236 RTX_FRAME_RELATED_P (insn) = 1;
8239 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
8240 RTX_FRAME_RELATED_P (insn) = 1;
8242 /* Align the stack. */
8243 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8245 GEN_INT (-align_bytes)));
8246 RTX_FRAME_RELATED_P (insn) = 1;
8248 /* Replicate the return address on the stack so that return
8249 address can be reached via (argp - 1) slot. This is needed
8250 to implement macro RETURN_ADDR_RTX and intrinsic function
8251 expand_builtin_return_addr etc. */
8253 x = gen_frame_mem (Pmode,
8254 plus_constant (x, -UNITS_PER_WORD));
8255 insn = emit_insn (gen_push (x));
8256 RTX_FRAME_RELATED_P (insn) = 1;
8259 /* Note: AT&T enter does NOT have reversed args. Enter is probably
8260 slower on all targets. Also sdb doesn't like it. */
8262 if (frame_pointer_needed)
8264 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
8265 RTX_FRAME_RELATED_P (insn) = 1;
8267 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
8268 RTX_FRAME_RELATED_P (insn) = 1;
8271 if (stack_realign_fp)
8273 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8274 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
8276 /* Align the stack. */
8277 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8279 GEN_INT (-align_bytes)));
8280 RTX_FRAME_RELATED_P (insn) = 1;
8283 allocate = frame.to_allocate + frame.nsseregs * 16 + frame.padding0;
8285 if (!frame.save_regs_using_mov)
8286 ix86_emit_save_regs ();
8288 allocate += frame.nregs * UNITS_PER_WORD;
8290 /* When using red zone we may start register saving before allocating
8291 the stack frame saving one cycle of the prologue. However I will
8292 avoid doing this if I am going to have to probe the stack since
8293 at least on x86_64 the stack probe can turn into a call that clobbers
8294 a red zone location */
8295 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
8296 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
8297 ix86_emit_save_regs_using_mov ((frame_pointer_needed
8298 && !crtl->stack_realign_needed)
8299 ? hard_frame_pointer_rtx
8300 : stack_pointer_rtx,
8301 -frame.nregs * UNITS_PER_WORD);
8305 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
8306 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8307 GEN_INT (-allocate), -1);
8310 /* Only valid for Win32. */
8311 rtx eax = gen_rtx_REG (Pmode, AX_REG);
8315 gcc_assert (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI);
8317 if (cfun->machine->call_abi == MS_ABI)
8320 eax_live = ix86_eax_live_at_start_p ();
8324 emit_insn (gen_push (eax));
8325 allocate -= UNITS_PER_WORD;
8328 emit_move_insn (eax, GEN_INT (allocate));
8331 insn = gen_allocate_stack_worker_64 (eax, eax);
8333 insn = gen_allocate_stack_worker_32 (eax, eax);
8334 insn = emit_insn (insn);
8335 RTX_FRAME_RELATED_P (insn) = 1;
8336 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
8337 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
8338 add_reg_note (insn, REG_FRAME_RELATED_EXPR, t);
8342 if (frame_pointer_needed)
8343 t = plus_constant (hard_frame_pointer_rtx,
8346 - frame.nregs * UNITS_PER_WORD);
8348 t = plus_constant (stack_pointer_rtx, allocate);
8349 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
8353 if (frame.save_regs_using_mov
8354 && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
8355 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
8357 if (!frame_pointer_needed
8358 || !frame.to_allocate
8359 || crtl->stack_realign_needed)
8360 ix86_emit_save_regs_using_mov (stack_pointer_rtx,
8362 + frame.nsseregs * 16 + frame.padding0);
8364 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
8365 -frame.nregs * UNITS_PER_WORD);
8367 if (!frame_pointer_needed
8368 || !frame.to_allocate
8369 || crtl->stack_realign_needed)
8370 ix86_emit_save_sse_regs_using_mov (stack_pointer_rtx,
8373 ix86_emit_save_sse_regs_using_mov (hard_frame_pointer_rtx,
8374 - frame.nregs * UNITS_PER_WORD
8375 - frame.nsseregs * 16
8378 pic_reg_used = false;
8379 if (pic_offset_table_rtx
8380 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8383 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
8385 if (alt_pic_reg_used != INVALID_REGNUM)
8386 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
8388 pic_reg_used = true;
8395 if (ix86_cmodel == CM_LARGE_PIC)
8397 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
8398 rtx label = gen_label_rtx ();
8400 LABEL_PRESERVE_P (label) = 1;
8401 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
8402 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
8403 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
8404 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
8405 pic_offset_table_rtx, tmp_reg));
8408 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
8411 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
8414 /* In the pic_reg_used case, make sure that the got load isn't deleted
8415 when mcount needs it. Blockage to avoid call movement across mcount
8416 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
8418 if (crtl->profile && pic_reg_used)
8419 emit_insn (gen_prologue_use (pic_offset_table_rtx));
8421 if (crtl->drap_reg && !crtl->stack_realign_needed)
8423 /* vDRAP is setup but after reload it turns out stack realign
8424 isn't necessary, here we will emit prologue to setup DRAP
8425 without stack realign adjustment */
8426 int drap_bp_offset = UNITS_PER_WORD * 2;
8427 rtx x = plus_constant (hard_frame_pointer_rtx, drap_bp_offset);
8428 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, x));
8431 /* Prevent instructions from being scheduled into register save push
8432 sequence when access to the redzone area is done through frame pointer.
8433 The offset betweeh the frame pointer and the stack pointer is calculated
8434 relative to the value of the stack pointer at the end of the function
8435 prologue, and moving instructions that access redzone area via frame
8436 pointer inside push sequence violates this assumption. */
8437 if (frame_pointer_needed && frame.red_zone_size)
8438 emit_insn (gen_memory_blockage ());
8440 /* Emit cld instruction if stringops are used in the function. */
8441 if (TARGET_CLD && ix86_current_function_needs_cld)
8442 emit_insn (gen_cld ());
8445 /* Emit code to restore saved registers using MOV insns. First register
8446 is restored from POINTER + OFFSET. */
8448 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8449 int maybe_eh_return)
8452 rtx base_address = gen_rtx_MEM (Pmode, pointer);
8454 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8455 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8457 /* Ensure that adjust_address won't be forced to produce pointer
8458 out of range allowed by x86-64 instruction set. */
8459 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8463 r11 = gen_rtx_REG (DImode, R11_REG);
8464 emit_move_insn (r11, GEN_INT (offset));
8465 emit_insn (gen_adddi3 (r11, r11, pointer));
8466 base_address = gen_rtx_MEM (Pmode, r11);
8469 emit_move_insn (gen_rtx_REG (Pmode, regno),
8470 adjust_address (base_address, Pmode, offset));
8471 offset += UNITS_PER_WORD;
8475 /* Emit code to restore saved registers using MOV insns. First register
8476 is restored from POINTER + OFFSET. */
8478 ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8479 int maybe_eh_return)
8482 rtx base_address = gen_rtx_MEM (TImode, pointer);
8485 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8486 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8488 /* Ensure that adjust_address won't be forced to produce pointer
8489 out of range allowed by x86-64 instruction set. */
8490 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8494 r11 = gen_rtx_REG (DImode, R11_REG);
8495 emit_move_insn (r11, GEN_INT (offset));
8496 emit_insn (gen_adddi3 (r11, r11, pointer));
8497 base_address = gen_rtx_MEM (TImode, r11);
8500 mem = adjust_address (base_address, TImode, offset);
8501 set_mem_align (mem, 128);
8502 emit_move_insn (gen_rtx_REG (TImode, regno), mem);
8507 /* Restore function stack, frame, and registers. */
8510 ix86_expand_epilogue (int style)
8514 struct ix86_frame frame;
8515 HOST_WIDE_INT offset;
8517 ix86_finalize_stack_realign_flags ();
8519 /* When stack is realigned, SP must be valid. */
8520 sp_valid = (!frame_pointer_needed
8521 || current_function_sp_is_unchanging
8522 || stack_realign_fp);
8524 ix86_compute_frame_layout (&frame);
8526 /* See the comment about red zone and frame
8527 pointer usage in ix86_expand_prologue. */
8528 if (frame_pointer_needed && frame.red_zone_size)
8529 emit_insn (gen_memory_blockage ());
8531 /* Calculate start of saved registers relative to ebp. Special care
8532 must be taken for the normal return case of a function using
8533 eh_return: the eax and edx registers are marked as saved, but not
8534 restored along this path. */
8535 offset = frame.nregs;
8536 if (crtl->calls_eh_return && style != 2)
8538 offset *= -UNITS_PER_WORD;
8539 offset -= frame.nsseregs * 16 + frame.padding0;
8541 /* If we're only restoring one register and sp is not valid then
8542 using a move instruction to restore the register since it's
8543 less work than reloading sp and popping the register.
8545 The default code result in stack adjustment using add/lea instruction,
8546 while this code results in LEAVE instruction (or discrete equivalent),
8547 so it is profitable in some other cases as well. Especially when there
8548 are no registers to restore. We also use this code when TARGET_USE_LEAVE
8549 and there is exactly one register to pop. This heuristic may need some
8550 tuning in future. */
8551 if ((!sp_valid && (frame.nregs + frame.nsseregs) <= 1)
8552 || (TARGET_EPILOGUE_USING_MOVE
8553 && cfun->machine->use_fast_prologue_epilogue
8554 && ((frame.nregs + frame.nsseregs) > 1 || frame.to_allocate))
8555 || (frame_pointer_needed && !(frame.nregs + frame.nsseregs) && frame.to_allocate)
8556 || (frame_pointer_needed && TARGET_USE_LEAVE
8557 && cfun->machine->use_fast_prologue_epilogue
8558 && (frame.nregs + frame.nsseregs) == 1)
8559 || crtl->calls_eh_return)
8561 /* Restore registers. We can use ebp or esp to address the memory
8562 locations. If both are available, default to ebp, since offsets
8563 are known to be small. Only exception is esp pointing directly
8564 to the end of block of saved registers, where we may simplify
8567 If we are realigning stack with bp and sp, regs restore can't
8568 be addressed by bp. sp must be used instead. */
8570 if (!frame_pointer_needed
8571 || (sp_valid && !frame.to_allocate)
8572 || stack_realign_fp)
8574 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8575 frame.to_allocate, style == 2);
8576 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
8578 + frame.nsseregs * 16
8579 + frame.padding0, style == 2);
8583 ix86_emit_restore_sse_regs_using_mov (hard_frame_pointer_rtx,
8584 offset, style == 2);
8585 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
8587 + frame.nsseregs * 16
8588 + frame.padding0, style == 2);
8591 /* eh_return epilogues need %ecx added to the stack pointer. */
8594 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
8596 /* Stack align doesn't work with eh_return. */
8597 gcc_assert (!crtl->stack_realign_needed);
8599 if (frame_pointer_needed)
8601 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
8602 tmp = plus_constant (tmp, UNITS_PER_WORD);
8603 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
8605 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
8606 emit_move_insn (hard_frame_pointer_rtx, tmp);
8608 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
8613 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
8614 tmp = plus_constant (tmp, (frame.to_allocate
8615 + frame.nregs * UNITS_PER_WORD
8616 + frame.nsseregs * 16
8618 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
8621 else if (!frame_pointer_needed)
8622 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8623 GEN_INT (frame.to_allocate
8624 + frame.nregs * UNITS_PER_WORD
8625 + frame.nsseregs * 16
8628 /* If not an i386, mov & pop is faster than "leave". */
8629 else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun)
8630 || !cfun->machine->use_fast_prologue_epilogue)
8631 emit_insn ((*ix86_gen_leave) ());
8634 pro_epilogue_adjust_stack (stack_pointer_rtx,
8635 hard_frame_pointer_rtx,
8638 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8643 /* First step is to deallocate the stack frame so that we can
8646 If we realign stack with frame pointer, then stack pointer
8647 won't be able to recover via lea $offset(%bp), %sp, because
8648 there is a padding area between bp and sp for realign.
8649 "add $to_allocate, %sp" must be used instead. */
8652 gcc_assert (frame_pointer_needed);
8653 gcc_assert (!stack_realign_fp);
8654 pro_epilogue_adjust_stack (stack_pointer_rtx,
8655 hard_frame_pointer_rtx,
8656 GEN_INT (offset), style);
8657 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8658 frame.to_allocate, style == 2);
8659 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8660 GEN_INT (frame.nsseregs * 16), style);
8662 else if (frame.to_allocate || frame.nsseregs)
8664 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8667 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8668 GEN_INT (frame.to_allocate
8669 + frame.nsseregs * 16
8670 + frame.padding0), style);
8673 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8674 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
8675 emit_insn ((*ix86_gen_pop1) (gen_rtx_REG (Pmode, regno)));
8676 if (frame_pointer_needed)
8678 /* Leave results in shorter dependency chains on CPUs that are
8679 able to grok it fast. */
8680 if (TARGET_USE_LEAVE)
8681 emit_insn ((*ix86_gen_leave) ());
8684 /* For stack realigned really happens, recover stack
8685 pointer to hard frame pointer is a must, if not using
8687 if (stack_realign_fp)
8688 pro_epilogue_adjust_stack (stack_pointer_rtx,
8689 hard_frame_pointer_rtx,
8691 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8696 if (crtl->drap_reg && crtl->stack_realign_needed)
8698 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8699 ? 0 : UNITS_PER_WORD);
8700 gcc_assert (stack_realign_drap);
8701 emit_insn ((*ix86_gen_add3) (stack_pointer_rtx,
8703 GEN_INT (-(UNITS_PER_WORD
8704 + param_ptr_offset))));
8705 if (!call_used_regs[REGNO (crtl->drap_reg)])
8706 emit_insn ((*ix86_gen_pop1) (crtl->drap_reg));
8710 /* Sibcall epilogues don't want a return instruction. */
8714 if (crtl->args.pops_args && crtl->args.size)
8716 rtx popc = GEN_INT (crtl->args.pops_args);
8718 /* i386 can only pop 64K bytes. If asked to pop more, pop
8719 return address, do explicit add, and jump indirectly to the
8722 if (crtl->args.pops_args >= 65536)
8724 rtx ecx = gen_rtx_REG (SImode, CX_REG);
8726 /* There is no "pascal" calling convention in any 64bit ABI. */
8727 gcc_assert (!TARGET_64BIT);
8729 emit_insn (gen_popsi1 (ecx));
8730 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
8731 emit_jump_insn (gen_return_indirect_internal (ecx));
8734 emit_jump_insn (gen_return_pop_internal (popc));
8737 emit_jump_insn (gen_return_internal ());
8740 /* Reset from the function's potential modifications. */
8743 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
8744 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
8746 if (pic_offset_table_rtx)
8747 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
8749 /* Mach-O doesn't support labels at the end of objects, so if
8750 it looks like we might want one, insert a NOP. */
8752 rtx insn = get_last_insn ();
8755 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
8756 insn = PREV_INSN (insn);
8760 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
8761 fputs ("\tnop\n", file);
8767 /* Extract the parts of an RTL expression that is a valid memory address
8768 for an instruction. Return 0 if the structure of the address is
8769 grossly off. Return -1 if the address contains ASHIFT, so it is not
8770 strictly valid, but still used for computing length of lea instruction. */
8773 ix86_decompose_address (rtx addr, struct ix86_address *out)
8775 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
8776 rtx base_reg, index_reg;
8777 HOST_WIDE_INT scale = 1;
8778 rtx scale_rtx = NULL_RTX;
8780 enum ix86_address_seg seg = SEG_DEFAULT;
8782 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
8784 else if (GET_CODE (addr) == PLUS)
8794 addends[n++] = XEXP (op, 1);
8797 while (GET_CODE (op) == PLUS);
8802 for (i = n; i >= 0; --i)
8805 switch (GET_CODE (op))
8810 index = XEXP (op, 0);
8811 scale_rtx = XEXP (op, 1);
8815 if (XINT (op, 1) == UNSPEC_TP
8816 && TARGET_TLS_DIRECT_SEG_REFS
8817 && seg == SEG_DEFAULT)
8818 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
8847 else if (GET_CODE (addr) == MULT)
8849 index = XEXP (addr, 0); /* index*scale */
8850 scale_rtx = XEXP (addr, 1);
8852 else if (GET_CODE (addr) == ASHIFT)
8856 /* We're called for lea too, which implements ashift on occasion. */
8857 index = XEXP (addr, 0);
8858 tmp = XEXP (addr, 1);
8859 if (!CONST_INT_P (tmp))
8861 scale = INTVAL (tmp);
8862 if ((unsigned HOST_WIDE_INT) scale > 3)
8868 disp = addr; /* displacement */
8870 /* Extract the integral value of scale. */
8873 if (!CONST_INT_P (scale_rtx))
8875 scale = INTVAL (scale_rtx);
8878 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
8879 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
8881 /* Allow arg pointer and stack pointer as index if there is not scaling. */
8882 if (base_reg && index_reg && scale == 1
8883 && (index_reg == arg_pointer_rtx
8884 || index_reg == frame_pointer_rtx
8885 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
8888 tmp = base, base = index, index = tmp;
8889 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
8892 /* Special case: %ebp cannot be encoded as a base without a displacement. */
8893 if ((base_reg == hard_frame_pointer_rtx
8894 || base_reg == frame_pointer_rtx
8895 || base_reg == arg_pointer_rtx) && !disp)
8898 /* Special case: on K6, [%esi] makes the instruction vector decoded.
8899 Avoid this by transforming to [%esi+0].
8900 Reload calls address legitimization without cfun defined, so we need
8901 to test cfun for being non-NULL. */
8902 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
8903 && base_reg && !index_reg && !disp
8905 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
8908 /* Special case: encode reg+reg instead of reg*2. */
8909 if (!base && index && scale && scale == 2)
8910 base = index, base_reg = index_reg, scale = 1;
8912 /* Special case: scaling cannot be encoded without base or displacement. */
8913 if (!base && !disp && index && scale != 1)
8925 /* Return cost of the memory address x.
8926 For i386, it is better to use a complex address than let gcc copy
8927 the address into a reg and make a new pseudo. But not if the address
8928 requires to two regs - that would mean more pseudos with longer
8931 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
8933 struct ix86_address parts;
8935 int ok = ix86_decompose_address (x, &parts);
8939 if (parts.base && GET_CODE (parts.base) == SUBREG)
8940 parts.base = SUBREG_REG (parts.base);
8941 if (parts.index && GET_CODE (parts.index) == SUBREG)
8942 parts.index = SUBREG_REG (parts.index);
8944 /* Attempt to minimize number of registers in the address. */
8946 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
8948 && (!REG_P (parts.index)
8949 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
8953 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
8955 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
8956 && parts.base != parts.index)
8959 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
8960 since it's predecode logic can't detect the length of instructions
8961 and it degenerates to vector decoded. Increase cost of such
8962 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
8963 to split such addresses or even refuse such addresses at all.
8965 Following addressing modes are affected:
8970 The first and last case may be avoidable by explicitly coding the zero in
8971 memory address, but I don't have AMD-K6 machine handy to check this
8975 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
8976 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
8977 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
8983 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
8984 this is used for to form addresses to local data when -fPIC is in
8988 darwin_local_data_pic (rtx disp)
8990 return (GET_CODE (disp) == UNSPEC
8991 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
8994 /* Determine if a given RTX is a valid constant. We already know this
8995 satisfies CONSTANT_P. */
8998 legitimate_constant_p (rtx x)
9000 switch (GET_CODE (x))
9005 if (GET_CODE (x) == PLUS)
9007 if (!CONST_INT_P (XEXP (x, 1)))
9012 if (TARGET_MACHO && darwin_local_data_pic (x))
9015 /* Only some unspecs are valid as "constants". */
9016 if (GET_CODE (x) == UNSPEC)
9017 switch (XINT (x, 1))
9022 return TARGET_64BIT;
9025 x = XVECEXP (x, 0, 0);
9026 return (GET_CODE (x) == SYMBOL_REF
9027 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9029 x = XVECEXP (x, 0, 0);
9030 return (GET_CODE (x) == SYMBOL_REF
9031 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
9036 /* We must have drilled down to a symbol. */
9037 if (GET_CODE (x) == LABEL_REF)
9039 if (GET_CODE (x) != SYMBOL_REF)
9044 /* TLS symbols are never valid. */
9045 if (SYMBOL_REF_TLS_MODEL (x))
9048 /* DLLIMPORT symbols are never valid. */
9049 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9050 && SYMBOL_REF_DLLIMPORT_P (x))
9055 if (GET_MODE (x) == TImode
9056 && x != CONST0_RTX (TImode)
9062 if (!standard_sse_constant_p (x))
9069 /* Otherwise we handle everything else in the move patterns. */
9073 /* Determine if it's legal to put X into the constant pool. This
9074 is not possible for the address of thread-local symbols, which
9075 is checked above. */
9078 ix86_cannot_force_const_mem (rtx x)
9080 /* We can always put integral constants and vectors in memory. */
9081 switch (GET_CODE (x))
9091 return !legitimate_constant_p (x);
9094 /* Determine if a given RTX is a valid constant address. */
9097 constant_address_p (rtx x)
9099 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
9102 /* Nonzero if the constant value X is a legitimate general operand
9103 when generating PIC code. It is given that flag_pic is on and
9104 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
9107 legitimate_pic_operand_p (rtx x)
9111 switch (GET_CODE (x))
9114 inner = XEXP (x, 0);
9115 if (GET_CODE (inner) == PLUS
9116 && CONST_INT_P (XEXP (inner, 1)))
9117 inner = XEXP (inner, 0);
9119 /* Only some unspecs are valid as "constants". */
9120 if (GET_CODE (inner) == UNSPEC)
9121 switch (XINT (inner, 1))
9126 return TARGET_64BIT;
9128 x = XVECEXP (inner, 0, 0);
9129 return (GET_CODE (x) == SYMBOL_REF
9130 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9131 case UNSPEC_MACHOPIC_OFFSET:
9132 return legitimate_pic_address_disp_p (x);
9140 return legitimate_pic_address_disp_p (x);
9147 /* Determine if a given CONST RTX is a valid memory displacement
9151 legitimate_pic_address_disp_p (rtx disp)
9155 /* In 64bit mode we can allow direct addresses of symbols and labels
9156 when they are not dynamic symbols. */
9159 rtx op0 = disp, op1;
9161 switch (GET_CODE (disp))
9167 if (GET_CODE (XEXP (disp, 0)) != PLUS)
9169 op0 = XEXP (XEXP (disp, 0), 0);
9170 op1 = XEXP (XEXP (disp, 0), 1);
9171 if (!CONST_INT_P (op1)
9172 || INTVAL (op1) >= 16*1024*1024
9173 || INTVAL (op1) < -16*1024*1024)
9175 if (GET_CODE (op0) == LABEL_REF)
9177 if (GET_CODE (op0) != SYMBOL_REF)
9182 /* TLS references should always be enclosed in UNSPEC. */
9183 if (SYMBOL_REF_TLS_MODEL (op0))
9185 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
9186 && ix86_cmodel != CM_LARGE_PIC)
9194 if (GET_CODE (disp) != CONST)
9196 disp = XEXP (disp, 0);
9200 /* We are unsafe to allow PLUS expressions. This limit allowed distance
9201 of GOT tables. We should not need these anyway. */
9202 if (GET_CODE (disp) != UNSPEC
9203 || (XINT (disp, 1) != UNSPEC_GOTPCREL
9204 && XINT (disp, 1) != UNSPEC_GOTOFF
9205 && XINT (disp, 1) != UNSPEC_PLTOFF))
9208 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
9209 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
9215 if (GET_CODE (disp) == PLUS)
9217 if (!CONST_INT_P (XEXP (disp, 1)))
9219 disp = XEXP (disp, 0);
9223 if (TARGET_MACHO && darwin_local_data_pic (disp))
9226 if (GET_CODE (disp) != UNSPEC)
9229 switch (XINT (disp, 1))
9234 /* We need to check for both symbols and labels because VxWorks loads
9235 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
9237 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9238 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
9240 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
9241 While ABI specify also 32bit relocation but we don't produce it in
9242 small PIC model at all. */
9243 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9244 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
9246 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
9248 case UNSPEC_GOTTPOFF:
9249 case UNSPEC_GOTNTPOFF:
9250 case UNSPEC_INDNTPOFF:
9253 disp = XVECEXP (disp, 0, 0);
9254 return (GET_CODE (disp) == SYMBOL_REF
9255 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
9257 disp = XVECEXP (disp, 0, 0);
9258 return (GET_CODE (disp) == SYMBOL_REF
9259 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
9261 disp = XVECEXP (disp, 0, 0);
9262 return (GET_CODE (disp) == SYMBOL_REF
9263 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
9269 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
9270 memory address for an instruction. The MODE argument is the machine mode
9271 for the MEM expression that wants to use this address.
9273 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
9274 convert common non-canonical forms to canonical form so that they will
9278 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
9279 rtx addr, int strict)
9281 struct ix86_address parts;
9282 rtx base, index, disp;
9283 HOST_WIDE_INT scale;
9284 const char *reason = NULL;
9285 rtx reason_rtx = NULL_RTX;
9287 if (ix86_decompose_address (addr, &parts) <= 0)
9289 reason = "decomposition failed";
9294 index = parts.index;
9296 scale = parts.scale;
9298 /* Validate base register.
9300 Don't allow SUBREG's that span more than a word here. It can lead to spill
9301 failures when the base is one word out of a two word structure, which is
9302 represented internally as a DImode int. */
9311 else if (GET_CODE (base) == SUBREG
9312 && REG_P (SUBREG_REG (base))
9313 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
9315 reg = SUBREG_REG (base);
9318 reason = "base is not a register";
9322 if (GET_MODE (base) != Pmode)
9324 reason = "base is not in Pmode";
9328 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
9329 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
9331 reason = "base is not valid";
9336 /* Validate index register.
9338 Don't allow SUBREG's that span more than a word here -- same as above. */
9347 else if (GET_CODE (index) == SUBREG
9348 && REG_P (SUBREG_REG (index))
9349 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
9351 reg = SUBREG_REG (index);
9354 reason = "index is not a register";
9358 if (GET_MODE (index) != Pmode)
9360 reason = "index is not in Pmode";
9364 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
9365 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
9367 reason = "index is not valid";
9372 /* Validate scale factor. */
9375 reason_rtx = GEN_INT (scale);
9378 reason = "scale without index";
9382 if (scale != 2 && scale != 4 && scale != 8)
9384 reason = "scale is not a valid multiplier";
9389 /* Validate displacement. */
9394 if (GET_CODE (disp) == CONST
9395 && GET_CODE (XEXP (disp, 0)) == UNSPEC
9396 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
9397 switch (XINT (XEXP (disp, 0), 1))
9399 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
9400 used. While ABI specify also 32bit relocations, we don't produce
9401 them at all and use IP relative instead. */
9404 gcc_assert (flag_pic);
9406 goto is_legitimate_pic;
9407 reason = "64bit address unspec";
9410 case UNSPEC_GOTPCREL:
9411 gcc_assert (flag_pic);
9412 goto is_legitimate_pic;
9414 case UNSPEC_GOTTPOFF:
9415 case UNSPEC_GOTNTPOFF:
9416 case UNSPEC_INDNTPOFF:
9422 reason = "invalid address unspec";
9426 else if (SYMBOLIC_CONST (disp)
9430 && MACHOPIC_INDIRECT
9431 && !machopic_operand_p (disp)
9437 if (TARGET_64BIT && (index || base))
9439 /* foo@dtpoff(%rX) is ok. */
9440 if (GET_CODE (disp) != CONST
9441 || GET_CODE (XEXP (disp, 0)) != PLUS
9442 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
9443 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
9444 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
9445 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
9447 reason = "non-constant pic memory reference";
9451 else if (! legitimate_pic_address_disp_p (disp))
9453 reason = "displacement is an invalid pic construct";
9457 /* This code used to verify that a symbolic pic displacement
9458 includes the pic_offset_table_rtx register.
9460 While this is good idea, unfortunately these constructs may
9461 be created by "adds using lea" optimization for incorrect
9470 This code is nonsensical, but results in addressing
9471 GOT table with pic_offset_table_rtx base. We can't
9472 just refuse it easily, since it gets matched by
9473 "addsi3" pattern, that later gets split to lea in the
9474 case output register differs from input. While this
9475 can be handled by separate addsi pattern for this case
9476 that never results in lea, this seems to be easier and
9477 correct fix for crash to disable this test. */
9479 else if (GET_CODE (disp) != LABEL_REF
9480 && !CONST_INT_P (disp)
9481 && (GET_CODE (disp) != CONST
9482 || !legitimate_constant_p (disp))
9483 && (GET_CODE (disp) != SYMBOL_REF
9484 || !legitimate_constant_p (disp)))
9486 reason = "displacement is not constant";
9489 else if (TARGET_64BIT
9490 && !x86_64_immediate_operand (disp, VOIDmode))
9492 reason = "displacement is out of range";
9497 /* Everything looks valid. */
9504 /* Return a unique alias set for the GOT. */
9506 static alias_set_type
9507 ix86_GOT_alias_set (void)
9509 static alias_set_type set = -1;
9511 set = new_alias_set ();
9515 /* Return a legitimate reference for ORIG (an address) using the
9516 register REG. If REG is 0, a new pseudo is generated.
9518 There are two types of references that must be handled:
9520 1. Global data references must load the address from the GOT, via
9521 the PIC reg. An insn is emitted to do this load, and the reg is
9524 2. Static data references, constant pool addresses, and code labels
9525 compute the address as an offset from the GOT, whose base is in
9526 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
9527 differentiate them from global data objects. The returned
9528 address is the PIC reg + an unspec constant.
9530 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
9531 reg also appears in the address. */
9534 legitimize_pic_address (rtx orig, rtx reg)
9541 if (TARGET_MACHO && !TARGET_64BIT)
9544 reg = gen_reg_rtx (Pmode);
9545 /* Use the generic Mach-O PIC machinery. */
9546 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
9550 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
9552 else if (TARGET_64BIT
9553 && ix86_cmodel != CM_SMALL_PIC
9554 && gotoff_operand (addr, Pmode))
9557 /* This symbol may be referenced via a displacement from the PIC
9558 base address (@GOTOFF). */
9560 if (reload_in_progress)
9561 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9562 if (GET_CODE (addr) == CONST)
9563 addr = XEXP (addr, 0);
9564 if (GET_CODE (addr) == PLUS)
9566 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9568 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9571 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9572 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9574 tmpreg = gen_reg_rtx (Pmode);
9577 emit_move_insn (tmpreg, new_rtx);
9581 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
9582 tmpreg, 1, OPTAB_DIRECT);
9585 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
9587 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
9589 /* This symbol may be referenced via a displacement from the PIC
9590 base address (@GOTOFF). */
9592 if (reload_in_progress)
9593 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9594 if (GET_CODE (addr) == CONST)
9595 addr = XEXP (addr, 0);
9596 if (GET_CODE (addr) == PLUS)
9598 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9600 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9603 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9604 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9605 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9609 emit_move_insn (reg, new_rtx);
9613 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
9614 /* We can't use @GOTOFF for text labels on VxWorks;
9615 see gotoff_operand. */
9616 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
9618 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
9620 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
9621 return legitimize_dllimport_symbol (addr, true);
9622 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
9623 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
9624 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
9626 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
9627 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
9631 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
9633 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
9634 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9635 new_rtx = gen_const_mem (Pmode, new_rtx);
9636 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9639 reg = gen_reg_rtx (Pmode);
9640 /* Use directly gen_movsi, otherwise the address is loaded
9641 into register for CSE. We don't want to CSE this addresses,
9642 instead we CSE addresses from the GOT table, so skip this. */
9643 emit_insn (gen_movsi (reg, new_rtx));
9648 /* This symbol must be referenced via a load from the
9649 Global Offset Table (@GOT). */
9651 if (reload_in_progress)
9652 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9653 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
9654 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9656 new_rtx = force_reg (Pmode, new_rtx);
9657 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9658 new_rtx = gen_const_mem (Pmode, new_rtx);
9659 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9662 reg = gen_reg_rtx (Pmode);
9663 emit_move_insn (reg, new_rtx);
9669 if (CONST_INT_P (addr)
9670 && !x86_64_immediate_operand (addr, VOIDmode))
9674 emit_move_insn (reg, addr);
9678 new_rtx = force_reg (Pmode, addr);
9680 else if (GET_CODE (addr) == CONST)
9682 addr = XEXP (addr, 0);
9684 /* We must match stuff we generate before. Assume the only
9685 unspecs that can get here are ours. Not that we could do
9686 anything with them anyway.... */
9687 if (GET_CODE (addr) == UNSPEC
9688 || (GET_CODE (addr) == PLUS
9689 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
9691 gcc_assert (GET_CODE (addr) == PLUS);
9693 if (GET_CODE (addr) == PLUS)
9695 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
9697 /* Check first to see if this is a constant offset from a @GOTOFF
9698 symbol reference. */
9699 if (gotoff_operand (op0, Pmode)
9700 && CONST_INT_P (op1))
9704 if (reload_in_progress)
9705 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9706 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
9708 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
9709 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9710 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9714 emit_move_insn (reg, new_rtx);
9720 if (INTVAL (op1) < -16*1024*1024
9721 || INTVAL (op1) >= 16*1024*1024)
9723 if (!x86_64_immediate_operand (op1, Pmode))
9724 op1 = force_reg (Pmode, op1);
9725 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
9731 base = legitimize_pic_address (XEXP (addr, 0), reg);
9732 new_rtx = legitimize_pic_address (XEXP (addr, 1),
9733 base == reg ? NULL_RTX : reg);
9735 if (CONST_INT_P (new_rtx))
9736 new_rtx = plus_constant (base, INTVAL (new_rtx));
9739 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
9741 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
9742 new_rtx = XEXP (new_rtx, 1);
9744 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
9752 /* Load the thread pointer. If TO_REG is true, force it into a register. */
9755 get_thread_pointer (int to_reg)
9759 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
9763 reg = gen_reg_rtx (Pmode);
9764 insn = gen_rtx_SET (VOIDmode, reg, tp);
9765 insn = emit_insn (insn);
9770 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
9771 false if we expect this to be used for a memory address and true if
9772 we expect to load the address into a register. */
9775 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
9777 rtx dest, base, off, pic, tp;
9782 case TLS_MODEL_GLOBAL_DYNAMIC:
9783 dest = gen_reg_rtx (Pmode);
9784 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
9786 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
9788 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
9791 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
9792 insns = get_insns ();
9795 RTL_CONST_CALL_P (insns) = 1;
9796 emit_libcall_block (insns, dest, rax, x);
9798 else if (TARGET_64BIT && TARGET_GNU2_TLS)
9799 emit_insn (gen_tls_global_dynamic_64 (dest, x));
9801 emit_insn (gen_tls_global_dynamic_32 (dest, x));
9803 if (TARGET_GNU2_TLS)
9805 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
9807 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9811 case TLS_MODEL_LOCAL_DYNAMIC:
9812 base = gen_reg_rtx (Pmode);
9813 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
9815 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
9817 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
9820 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
9821 insns = get_insns ();
9824 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
9825 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
9826 RTL_CONST_CALL_P (insns) = 1;
9827 emit_libcall_block (insns, base, rax, note);
9829 else if (TARGET_64BIT && TARGET_GNU2_TLS)
9830 emit_insn (gen_tls_local_dynamic_base_64 (base));
9832 emit_insn (gen_tls_local_dynamic_base_32 (base));
9834 if (TARGET_GNU2_TLS)
9836 rtx x = ix86_tls_module_base ();
9838 set_unique_reg_note (get_last_insn (), REG_EQUIV,
9839 gen_rtx_MINUS (Pmode, x, tp));
9842 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
9843 off = gen_rtx_CONST (Pmode, off);
9845 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
9847 if (TARGET_GNU2_TLS)
9849 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
9851 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9856 case TLS_MODEL_INITIAL_EXEC:
9860 type = UNSPEC_GOTNTPOFF;
9864 if (reload_in_progress)
9865 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9866 pic = pic_offset_table_rtx;
9867 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
9869 else if (!TARGET_ANY_GNU_TLS)
9871 pic = gen_reg_rtx (Pmode);
9872 emit_insn (gen_set_got (pic));
9873 type = UNSPEC_GOTTPOFF;
9878 type = UNSPEC_INDNTPOFF;
9881 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
9882 off = gen_rtx_CONST (Pmode, off);
9884 off = gen_rtx_PLUS (Pmode, pic, off);
9885 off = gen_const_mem (Pmode, off);
9886 set_mem_alias_set (off, ix86_GOT_alias_set ());
9888 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9890 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9891 off = force_reg (Pmode, off);
9892 return gen_rtx_PLUS (Pmode, base, off);
9896 base = get_thread_pointer (true);
9897 dest = gen_reg_rtx (Pmode);
9898 emit_insn (gen_subsi3 (dest, base, off));
9902 case TLS_MODEL_LOCAL_EXEC:
9903 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
9904 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9905 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
9906 off = gen_rtx_CONST (Pmode, off);
9908 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9910 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9911 return gen_rtx_PLUS (Pmode, base, off);
9915 base = get_thread_pointer (true);
9916 dest = gen_reg_rtx (Pmode);
9917 emit_insn (gen_subsi3 (dest, base, off));
9928 /* Create or return the unique __imp_DECL dllimport symbol corresponding
9931 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
9932 htab_t dllimport_map;
9935 get_dllimport_decl (tree decl)
9937 struct tree_map *h, in;
9941 size_t namelen, prefixlen;
9947 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
9949 in.hash = htab_hash_pointer (decl);
9950 in.base.from = decl;
9951 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
9952 h = (struct tree_map *) *loc;
9956 *loc = h = GGC_NEW (struct tree_map);
9958 h->base.from = decl;
9959 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
9960 DECL_ARTIFICIAL (to) = 1;
9961 DECL_IGNORED_P (to) = 1;
9962 DECL_EXTERNAL (to) = 1;
9963 TREE_READONLY (to) = 1;
9965 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
9966 name = targetm.strip_name_encoding (name);
9967 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
9968 ? "*__imp_" : "*__imp__";
9969 namelen = strlen (name);
9970 prefixlen = strlen (prefix);
9971 imp_name = (char *) alloca (namelen + prefixlen + 1);
9972 memcpy (imp_name, prefix, prefixlen);
9973 memcpy (imp_name + prefixlen, name, namelen + 1);
9975 name = ggc_alloc_string (imp_name, namelen + prefixlen);
9976 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
9977 SET_SYMBOL_REF_DECL (rtl, to);
9978 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
9980 rtl = gen_const_mem (Pmode, rtl);
9981 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
9983 SET_DECL_RTL (to, rtl);
9984 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
9989 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
9990 true if we require the result be a register. */
9993 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
9998 gcc_assert (SYMBOL_REF_DECL (symbol));
9999 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
10001 x = DECL_RTL (imp_decl);
10003 x = force_reg (Pmode, x);
10007 /* Try machine-dependent ways of modifying an illegitimate address
10008 to be legitimate. If we find one, return the new, valid address.
10009 This macro is used in only one place: `memory_address' in explow.c.
10011 OLDX is the address as it was before break_out_memory_refs was called.
10012 In some cases it is useful to look at this to decide what needs to be done.
10014 MODE and WIN are passed so that this macro can use
10015 GO_IF_LEGITIMATE_ADDRESS.
10017 It is always safe for this macro to do nothing. It exists to recognize
10018 opportunities to optimize the output.
10020 For the 80386, we handle X+REG by loading X into a register R and
10021 using R+REG. R will go in a general reg and indexing will be used.
10022 However, if REG is a broken-out memory address or multiplication,
10023 nothing needs to be done because REG can certainly go in a general reg.
10025 When -fpic is used, special handling is needed for symbolic references.
10026 See comments by legitimize_pic_address in i386.c for details. */
10029 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
10034 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
10036 return legitimize_tls_address (x, (enum tls_model) log, false);
10037 if (GET_CODE (x) == CONST
10038 && GET_CODE (XEXP (x, 0)) == PLUS
10039 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10040 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
10042 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
10043 (enum tls_model) log, false);
10044 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10047 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
10049 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
10050 return legitimize_dllimport_symbol (x, true);
10051 if (GET_CODE (x) == CONST
10052 && GET_CODE (XEXP (x, 0)) == PLUS
10053 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10054 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
10056 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
10057 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10061 if (flag_pic && SYMBOLIC_CONST (x))
10062 return legitimize_pic_address (x, 0);
10064 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
10065 if (GET_CODE (x) == ASHIFT
10066 && CONST_INT_P (XEXP (x, 1))
10067 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
10070 log = INTVAL (XEXP (x, 1));
10071 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
10072 GEN_INT (1 << log));
10075 if (GET_CODE (x) == PLUS)
10077 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
10079 if (GET_CODE (XEXP (x, 0)) == ASHIFT
10080 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10081 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
10084 log = INTVAL (XEXP (XEXP (x, 0), 1));
10085 XEXP (x, 0) = gen_rtx_MULT (Pmode,
10086 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
10087 GEN_INT (1 << log));
10090 if (GET_CODE (XEXP (x, 1)) == ASHIFT
10091 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10092 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
10095 log = INTVAL (XEXP (XEXP (x, 1), 1));
10096 XEXP (x, 1) = gen_rtx_MULT (Pmode,
10097 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
10098 GEN_INT (1 << log));
10101 /* Put multiply first if it isn't already. */
10102 if (GET_CODE (XEXP (x, 1)) == MULT)
10104 rtx tmp = XEXP (x, 0);
10105 XEXP (x, 0) = XEXP (x, 1);
10110 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
10111 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
10112 created by virtual register instantiation, register elimination, and
10113 similar optimizations. */
10114 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
10117 x = gen_rtx_PLUS (Pmode,
10118 gen_rtx_PLUS (Pmode, XEXP (x, 0),
10119 XEXP (XEXP (x, 1), 0)),
10120 XEXP (XEXP (x, 1), 1));
10124 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
10125 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
10126 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
10127 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10128 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
10129 && CONSTANT_P (XEXP (x, 1)))
10132 rtx other = NULL_RTX;
10134 if (CONST_INT_P (XEXP (x, 1)))
10136 constant = XEXP (x, 1);
10137 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
10139 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
10141 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
10142 other = XEXP (x, 1);
10150 x = gen_rtx_PLUS (Pmode,
10151 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
10152 XEXP (XEXP (XEXP (x, 0), 1), 0)),
10153 plus_constant (other, INTVAL (constant)));
10157 if (changed && legitimate_address_p (mode, x, FALSE))
10160 if (GET_CODE (XEXP (x, 0)) == MULT)
10163 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
10166 if (GET_CODE (XEXP (x, 1)) == MULT)
10169 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
10173 && REG_P (XEXP (x, 1))
10174 && REG_P (XEXP (x, 0)))
10177 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
10180 x = legitimize_pic_address (x, 0);
10183 if (changed && legitimate_address_p (mode, x, FALSE))
10186 if (REG_P (XEXP (x, 0)))
10188 rtx temp = gen_reg_rtx (Pmode);
10189 rtx val = force_operand (XEXP (x, 1), temp);
10191 emit_move_insn (temp, val);
10193 XEXP (x, 1) = temp;
10197 else if (REG_P (XEXP (x, 1)))
10199 rtx temp = gen_reg_rtx (Pmode);
10200 rtx val = force_operand (XEXP (x, 0), temp);
10202 emit_move_insn (temp, val);
10204 XEXP (x, 0) = temp;
10212 /* Print an integer constant expression in assembler syntax. Addition
10213 and subtraction are the only arithmetic that may appear in these
10214 expressions. FILE is the stdio stream to write to, X is the rtx, and
10215 CODE is the operand print code from the output string. */
10218 output_pic_addr_const (FILE *file, rtx x, int code)
10222 switch (GET_CODE (x))
10225 gcc_assert (flag_pic);
10230 if (! TARGET_MACHO || TARGET_64BIT)
10231 output_addr_const (file, x);
10234 const char *name = XSTR (x, 0);
10236 /* Mark the decl as referenced so that cgraph will
10237 output the function. */
10238 if (SYMBOL_REF_DECL (x))
10239 mark_decl_referenced (SYMBOL_REF_DECL (x));
10242 if (MACHOPIC_INDIRECT
10243 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
10244 name = machopic_indirection_name (x, /*stub_p=*/true);
10246 assemble_name (file, name);
10248 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
10249 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
10250 fputs ("@PLT", file);
10257 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
10258 assemble_name (asm_out_file, buf);
10262 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
10266 /* This used to output parentheses around the expression,
10267 but that does not work on the 386 (either ATT or BSD assembler). */
10268 output_pic_addr_const (file, XEXP (x, 0), code);
10272 if (GET_MODE (x) == VOIDmode)
10274 /* We can use %d if the number is <32 bits and positive. */
10275 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
10276 fprintf (file, "0x%lx%08lx",
10277 (unsigned long) CONST_DOUBLE_HIGH (x),
10278 (unsigned long) CONST_DOUBLE_LOW (x));
10280 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
10283 /* We can't handle floating point constants;
10284 PRINT_OPERAND must handle them. */
10285 output_operand_lossage ("floating constant misused");
10289 /* Some assemblers need integer constants to appear first. */
10290 if (CONST_INT_P (XEXP (x, 0)))
10292 output_pic_addr_const (file, XEXP (x, 0), code);
10294 output_pic_addr_const (file, XEXP (x, 1), code);
10298 gcc_assert (CONST_INT_P (XEXP (x, 1)));
10299 output_pic_addr_const (file, XEXP (x, 1), code);
10301 output_pic_addr_const (file, XEXP (x, 0), code);
10307 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
10308 output_pic_addr_const (file, XEXP (x, 0), code);
10310 output_pic_addr_const (file, XEXP (x, 1), code);
10312 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
10316 gcc_assert (XVECLEN (x, 0) == 1);
10317 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
10318 switch (XINT (x, 1))
10321 fputs ("@GOT", file);
10323 case UNSPEC_GOTOFF:
10324 fputs ("@GOTOFF", file);
10326 case UNSPEC_PLTOFF:
10327 fputs ("@PLTOFF", file);
10329 case UNSPEC_GOTPCREL:
10330 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10331 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
10333 case UNSPEC_GOTTPOFF:
10334 /* FIXME: This might be @TPOFF in Sun ld too. */
10335 fputs ("@GOTTPOFF", file);
10338 fputs ("@TPOFF", file);
10340 case UNSPEC_NTPOFF:
10342 fputs ("@TPOFF", file);
10344 fputs ("@NTPOFF", file);
10346 case UNSPEC_DTPOFF:
10347 fputs ("@DTPOFF", file);
10349 case UNSPEC_GOTNTPOFF:
10351 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10352 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
10354 fputs ("@GOTNTPOFF", file);
10356 case UNSPEC_INDNTPOFF:
10357 fputs ("@INDNTPOFF", file);
10360 case UNSPEC_MACHOPIC_OFFSET:
10362 machopic_output_function_base_name (file);
10366 output_operand_lossage ("invalid UNSPEC as operand");
10372 output_operand_lossage ("invalid expression as operand");
10376 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
10377 We need to emit DTP-relative relocations. */
10379 static void ATTRIBUTE_UNUSED
10380 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
10382 fputs (ASM_LONG, file);
10383 output_addr_const (file, x);
10384 fputs ("@DTPOFF", file);
10390 fputs (", 0", file);
10393 gcc_unreachable ();
10397 /* Return true if X is a representation of the PIC register. This copes
10398 with calls from ix86_find_base_term, where the register might have
10399 been replaced by a cselib value. */
10402 ix86_pic_register_p (rtx x)
10404 if (GET_CODE (x) == VALUE)
10405 return (pic_offset_table_rtx
10406 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
10408 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
10411 /* In the name of slightly smaller debug output, and to cater to
10412 general assembler lossage, recognize PIC+GOTOFF and turn it back
10413 into a direct symbol reference.
10415 On Darwin, this is necessary to avoid a crash, because Darwin
10416 has a different PIC label for each routine but the DWARF debugging
10417 information is not associated with any particular routine, so it's
10418 necessary to remove references to the PIC label from RTL stored by
10419 the DWARF output code. */
10422 ix86_delegitimize_address (rtx orig_x)
10425 /* reg_addend is NULL or a multiple of some register. */
10426 rtx reg_addend = NULL_RTX;
10427 /* const_addend is NULL or a const_int. */
10428 rtx const_addend = NULL_RTX;
10429 /* This is the result, or NULL. */
10430 rtx result = NULL_RTX;
10437 if (GET_CODE (x) != CONST
10438 || GET_CODE (XEXP (x, 0)) != UNSPEC
10439 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
10440 || !MEM_P (orig_x))
10442 return XVECEXP (XEXP (x, 0), 0, 0);
10445 if (GET_CODE (x) != PLUS
10446 || GET_CODE (XEXP (x, 1)) != CONST)
10449 if (ix86_pic_register_p (XEXP (x, 0)))
10450 /* %ebx + GOT/GOTOFF */
10452 else if (GET_CODE (XEXP (x, 0)) == PLUS)
10454 /* %ebx + %reg * scale + GOT/GOTOFF */
10455 reg_addend = XEXP (x, 0);
10456 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
10457 reg_addend = XEXP (reg_addend, 1);
10458 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
10459 reg_addend = XEXP (reg_addend, 0);
10462 if (!REG_P (reg_addend)
10463 && GET_CODE (reg_addend) != MULT
10464 && GET_CODE (reg_addend) != ASHIFT)
10470 x = XEXP (XEXP (x, 1), 0);
10471 if (GET_CODE (x) == PLUS
10472 && CONST_INT_P (XEXP (x, 1)))
10474 const_addend = XEXP (x, 1);
10478 if (GET_CODE (x) == UNSPEC
10479 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
10480 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
10481 result = XVECEXP (x, 0, 0);
10483 if (TARGET_MACHO && darwin_local_data_pic (x)
10484 && !MEM_P (orig_x))
10485 result = XVECEXP (x, 0, 0);
10491 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
10493 result = gen_rtx_PLUS (Pmode, reg_addend, result);
10497 /* If X is a machine specific address (i.e. a symbol or label being
10498 referenced as a displacement from the GOT implemented using an
10499 UNSPEC), then return the base term. Otherwise return X. */
10502 ix86_find_base_term (rtx x)
10508 if (GET_CODE (x) != CONST)
10510 term = XEXP (x, 0);
10511 if (GET_CODE (term) == PLUS
10512 && (CONST_INT_P (XEXP (term, 1))
10513 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
10514 term = XEXP (term, 0);
10515 if (GET_CODE (term) != UNSPEC
10516 || XINT (term, 1) != UNSPEC_GOTPCREL)
10519 return XVECEXP (term, 0, 0);
10522 return ix86_delegitimize_address (x);
10526 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
10527 int fp, FILE *file)
10529 const char *suffix;
10531 if (mode == CCFPmode || mode == CCFPUmode)
10533 enum rtx_code second_code, bypass_code;
10534 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
10535 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
10536 code = ix86_fp_compare_code_to_integer (code);
10540 code = reverse_condition (code);
10591 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
10595 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
10596 Those same assemblers have the same but opposite lossage on cmov. */
10597 if (mode == CCmode)
10598 suffix = fp ? "nbe" : "a";
10599 else if (mode == CCCmode)
10602 gcc_unreachable ();
10618 gcc_unreachable ();
10622 gcc_assert (mode == CCmode || mode == CCCmode);
10639 gcc_unreachable ();
10643 /* ??? As above. */
10644 gcc_assert (mode == CCmode || mode == CCCmode);
10645 suffix = fp ? "nb" : "ae";
10648 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
10652 /* ??? As above. */
10653 if (mode == CCmode)
10655 else if (mode == CCCmode)
10656 suffix = fp ? "nb" : "ae";
10658 gcc_unreachable ();
10661 suffix = fp ? "u" : "p";
10664 suffix = fp ? "nu" : "np";
10667 gcc_unreachable ();
10669 fputs (suffix, file);
10672 /* Print the name of register X to FILE based on its machine mode and number.
10673 If CODE is 'w', pretend the mode is HImode.
10674 If CODE is 'b', pretend the mode is QImode.
10675 If CODE is 'k', pretend the mode is SImode.
10676 If CODE is 'q', pretend the mode is DImode.
10677 If CODE is 'x', pretend the mode is V4SFmode.
10678 If CODE is 't', pretend the mode is V8SFmode.
10679 If CODE is 'h', pretend the reg is the 'high' byte register.
10680 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
10681 If CODE is 'd', duplicate the operand for AVX instruction.
10685 print_reg (rtx x, int code, FILE *file)
10688 bool duplicated = code == 'd' && TARGET_AVX;
10690 gcc_assert (x == pc_rtx
10691 || (REGNO (x) != ARG_POINTER_REGNUM
10692 && REGNO (x) != FRAME_POINTER_REGNUM
10693 && REGNO (x) != FLAGS_REG
10694 && REGNO (x) != FPSR_REG
10695 && REGNO (x) != FPCR_REG));
10697 if (ASSEMBLER_DIALECT == ASM_ATT)
10702 gcc_assert (TARGET_64BIT);
10703 fputs ("rip", file);
10707 if (code == 'w' || MMX_REG_P (x))
10709 else if (code == 'b')
10711 else if (code == 'k')
10713 else if (code == 'q')
10715 else if (code == 'y')
10717 else if (code == 'h')
10719 else if (code == 'x')
10721 else if (code == 't')
10724 code = GET_MODE_SIZE (GET_MODE (x));
10726 /* Irritatingly, AMD extended registers use different naming convention
10727 from the normal registers. */
10728 if (REX_INT_REG_P (x))
10730 gcc_assert (TARGET_64BIT);
10734 error ("extended registers have no high halves");
10737 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
10740 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
10743 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
10746 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
10749 error ("unsupported operand size for extended register");
10759 if (STACK_TOP_P (x))
10768 if (! ANY_FP_REG_P (x))
10769 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
10774 reg = hi_reg_name[REGNO (x)];
10777 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
10779 reg = qi_reg_name[REGNO (x)];
10782 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
10784 reg = qi_high_reg_name[REGNO (x)];
10789 gcc_assert (!duplicated);
10791 fputs (hi_reg_name[REGNO (x)] + 1, file);
10796 gcc_unreachable ();
10802 if (ASSEMBLER_DIALECT == ASM_ATT)
10803 fprintf (file, ", %%%s", reg);
10805 fprintf (file, ", %s", reg);
10809 /* Locate some local-dynamic symbol still in use by this function
10810 so that we can print its name in some tls_local_dynamic_base
10814 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
10818 if (GET_CODE (x) == SYMBOL_REF
10819 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
10821 cfun->machine->some_ld_name = XSTR (x, 0);
10828 static const char *
10829 get_some_local_dynamic_name (void)
10833 if (cfun->machine->some_ld_name)
10834 return cfun->machine->some_ld_name;
10836 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
10838 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
10839 return cfun->machine->some_ld_name;
10841 gcc_unreachable ();
10844 /* Meaning of CODE:
10845 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
10846 C -- print opcode suffix for set/cmov insn.
10847 c -- like C, but print reversed condition
10848 E,e -- likewise, but for compare-and-branch fused insn.
10849 F,f -- likewise, but for floating-point.
10850 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
10852 R -- print the prefix for register names.
10853 z -- print the opcode suffix for the size of the current operand.
10854 Z -- likewise, with special suffixes for fild/fist instructions.
10855 * -- print a star (in certain assembler syntax)
10856 A -- print an absolute memory reference.
10857 w -- print the operand as if it's a "word" (HImode) even if it isn't.
10858 s -- print a shift double count, followed by the assemblers argument
10860 b -- print the QImode name of the register for the indicated operand.
10861 %b0 would print %al if operands[0] is reg 0.
10862 w -- likewise, print the HImode name of the register.
10863 k -- likewise, print the SImode name of the register.
10864 q -- likewise, print the DImode name of the register.
10865 x -- likewise, print the V4SFmode name of the register.
10866 t -- likewise, print the V8SFmode name of the register.
10867 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
10868 y -- print "st(0)" instead of "st" as a register.
10869 d -- print duplicated register operand for AVX instruction.
10870 D -- print condition for SSE cmp instruction.
10871 P -- if PIC, print an @PLT suffix.
10872 X -- don't print any sort of PIC '@' suffix for a symbol.
10873 & -- print some in-use local-dynamic symbol name.
10874 H -- print a memory address offset by 8; used for sse high-parts
10875 Y -- print condition for SSE5 com* instruction.
10876 + -- print a branch hint as 'cs' or 'ds' prefix
10877 ; -- print a semicolon (after prefixes due to bug in older gas).
10881 print_operand (FILE *file, rtx x, int code)
10888 if (ASSEMBLER_DIALECT == ASM_ATT)
10893 assemble_name (file, get_some_local_dynamic_name ());
10897 switch (ASSEMBLER_DIALECT)
10904 /* Intel syntax. For absolute addresses, registers should not
10905 be surrounded by braces. */
10909 PRINT_OPERAND (file, x, 0);
10916 gcc_unreachable ();
10919 PRINT_OPERAND (file, x, 0);
10924 if (ASSEMBLER_DIALECT == ASM_ATT)
10929 if (ASSEMBLER_DIALECT == ASM_ATT)
10934 if (ASSEMBLER_DIALECT == ASM_ATT)
10939 if (ASSEMBLER_DIALECT == ASM_ATT)
10944 if (ASSEMBLER_DIALECT == ASM_ATT)
10949 if (ASSEMBLER_DIALECT == ASM_ATT)
10954 gcc_assert (MEM_P (x));
10956 /* fild/fist don't get size suffixes if using Intel opcodes. */
10957 if (ASSEMBLER_DIALECT == ASM_INTEL)
10960 switch (GET_MODE_SIZE (GET_MODE (x)))
10963 #ifdef HAVE_AS_IX86_FILDS
10973 #ifdef HAVE_AS_IX86_FILDQ
10976 fputs ("ll", file);
10981 gcc_unreachable ();
10985 /* 387 opcodes don't get size suffixes if the operands are
10987 if (STACK_REG_P (x))
10990 /* Likewise if using Intel opcodes. */
10991 if (ASSEMBLER_DIALECT == ASM_INTEL)
10994 /* This is the size of op from size of operand. */
10995 switch (GET_MODE_SIZE (GET_MODE (x)))
11002 /* ??? This fails for HImode integer
11003 operator with memory operand. */
11006 #ifdef HAVE_AS_IX86_FILDS
11016 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
11023 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
11035 gcc_unreachable ();
11052 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
11054 PRINT_OPERAND (file, x, 0);
11055 fputs (", ", file);
11060 /* Little bit of braindamage here. The SSE compare instructions
11061 does use completely different names for the comparisons that the
11062 fp conditional moves. */
11065 switch (GET_CODE (x))
11068 fputs ("eq", file);
11071 fputs ("eq_us", file);
11074 fputs ("lt", file);
11077 fputs ("nge", file);
11080 fputs ("le", file);
11083 fputs ("ngt", file);
11086 fputs ("unord", file);
11089 fputs ("neq", file);
11092 fputs ("neq_oq", file);
11095 fputs ("ge", file);
11098 fputs ("nlt", file);
11101 fputs ("gt", file);
11104 fputs ("nle", file);
11107 fputs ("ord", file);
11110 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11116 switch (GET_CODE (x))
11120 fputs ("eq", file);
11124 fputs ("lt", file);
11128 fputs ("le", file);
11131 fputs ("unord", file);
11135 fputs ("neq", file);
11139 fputs ("nlt", file);
11143 fputs ("nle", file);
11146 fputs ("ord", file);
11149 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11155 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11156 if (ASSEMBLER_DIALECT == ASM_ATT)
11158 switch (GET_MODE (x))
11160 case HImode: putc ('w', file); break;
11162 case SFmode: putc ('l', file); break;
11164 case DFmode: putc ('q', file); break;
11165 default: gcc_unreachable ();
11172 if (!COMPARISON_P (x))
11174 output_operand_lossage ("operand is neither a constant nor a "
11175 "condition code, invalid operand code "
11179 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
11182 if (!COMPARISON_P (x))
11184 output_operand_lossage ("operand is neither a constant nor a "
11185 "condition code, invalid operand code "
11189 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11190 if (ASSEMBLER_DIALECT == ASM_ATT)
11193 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
11196 /* Like above, but reverse condition */
11198 /* Check to see if argument to %c is really a constant
11199 and not a condition code which needs to be reversed. */
11200 if (!COMPARISON_P (x))
11202 output_operand_lossage ("operand is neither a constant nor a "
11203 "condition code, invalid operand "
11207 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
11210 if (!COMPARISON_P (x))
11212 output_operand_lossage ("operand is neither a constant nor a "
11213 "condition code, invalid operand "
11217 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11218 if (ASSEMBLER_DIALECT == ASM_ATT)
11221 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
11225 put_condition_code (GET_CODE (x), CCmode, 0, 0, file);
11229 put_condition_code (GET_CODE (x), CCmode, 1, 0, file);
11233 /* It doesn't actually matter what mode we use here, as we're
11234 only going to use this for printing. */
11235 x = adjust_address_nv (x, DImode, 8);
11243 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
11246 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
11249 int pred_val = INTVAL (XEXP (x, 0));
11251 if (pred_val < REG_BR_PROB_BASE * 45 / 100
11252 || pred_val > REG_BR_PROB_BASE * 55 / 100)
11254 int taken = pred_val > REG_BR_PROB_BASE / 2;
11255 int cputaken = final_forward_branch_p (current_output_insn) == 0;
11257 /* Emit hints only in the case default branch prediction
11258 heuristics would fail. */
11259 if (taken != cputaken)
11261 /* We use 3e (DS) prefix for taken branches and
11262 2e (CS) prefix for not taken branches. */
11264 fputs ("ds ; ", file);
11266 fputs ("cs ; ", file);
11274 switch (GET_CODE (x))
11277 fputs ("neq", file);
11280 fputs ("eq", file);
11284 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
11288 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
11292 fputs ("le", file);
11296 fputs ("lt", file);
11299 fputs ("unord", file);
11302 fputs ("ord", file);
11305 fputs ("ueq", file);
11308 fputs ("nlt", file);
11311 fputs ("nle", file);
11314 fputs ("ule", file);
11317 fputs ("ult", file);
11320 fputs ("une", file);
11323 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11330 fputs (" ; ", file);
11337 output_operand_lossage ("invalid operand code '%c'", code);
11342 print_reg (x, code, file);
11344 else if (MEM_P (x))
11346 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
11347 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
11348 && GET_MODE (x) != BLKmode)
11351 switch (GET_MODE_SIZE (GET_MODE (x)))
11353 case 1: size = "BYTE"; break;
11354 case 2: size = "WORD"; break;
11355 case 4: size = "DWORD"; break;
11356 case 8: size = "QWORD"; break;
11357 case 12: size = "XWORD"; break;
11359 if (GET_MODE (x) == XFmode)
11365 gcc_unreachable ();
11368 /* Check for explicit size override (codes 'b', 'w' and 'k') */
11371 else if (code == 'w')
11373 else if (code == 'k')
11376 fputs (size, file);
11377 fputs (" PTR ", file);
11381 /* Avoid (%rip) for call operands. */
11382 if (CONSTANT_ADDRESS_P (x) && code == 'P'
11383 && !CONST_INT_P (x))
11384 output_addr_const (file, x);
11385 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
11386 output_operand_lossage ("invalid constraints for operand");
11388 output_address (x);
11391 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
11396 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11397 REAL_VALUE_TO_TARGET_SINGLE (r, l);
11399 if (ASSEMBLER_DIALECT == ASM_ATT)
11401 fprintf (file, "0x%08lx", (long unsigned int) l);
11404 /* These float cases don't actually occur as immediate operands. */
11405 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
11409 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11410 fprintf (file, "%s", dstr);
11413 else if (GET_CODE (x) == CONST_DOUBLE
11414 && GET_MODE (x) == XFmode)
11418 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11419 fprintf (file, "%s", dstr);
11424 /* We have patterns that allow zero sets of memory, for instance.
11425 In 64-bit mode, we should probably support all 8-byte vectors,
11426 since we can in fact encode that into an immediate. */
11427 if (GET_CODE (x) == CONST_VECTOR)
11429 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
11435 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
11437 if (ASSEMBLER_DIALECT == ASM_ATT)
11440 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
11441 || GET_CODE (x) == LABEL_REF)
11443 if (ASSEMBLER_DIALECT == ASM_ATT)
11446 fputs ("OFFSET FLAT:", file);
11449 if (CONST_INT_P (x))
11450 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
11452 output_pic_addr_const (file, x, code);
11454 output_addr_const (file, x);
11458 /* Print a memory operand whose address is ADDR. */
11461 print_operand_address (FILE *file, rtx addr)
11463 struct ix86_address parts;
11464 rtx base, index, disp;
11466 int ok = ix86_decompose_address (addr, &parts);
11471 index = parts.index;
11473 scale = parts.scale;
11481 if (ASSEMBLER_DIALECT == ASM_ATT)
11483 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
11486 gcc_unreachable ();
11489 /* Use one byte shorter RIP relative addressing for 64bit mode. */
11490 if (TARGET_64BIT && !base && !index)
11494 if (GET_CODE (disp) == CONST
11495 && GET_CODE (XEXP (disp, 0)) == PLUS
11496 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11497 symbol = XEXP (XEXP (disp, 0), 0);
11499 if (GET_CODE (symbol) == LABEL_REF
11500 || (GET_CODE (symbol) == SYMBOL_REF
11501 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
11504 if (!base && !index)
11506 /* Displacement only requires special attention. */
11508 if (CONST_INT_P (disp))
11510 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
11511 fputs ("ds:", file);
11512 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
11515 output_pic_addr_const (file, disp, 0);
11517 output_addr_const (file, disp);
11521 if (ASSEMBLER_DIALECT == ASM_ATT)
11526 output_pic_addr_const (file, disp, 0);
11527 else if (GET_CODE (disp) == LABEL_REF)
11528 output_asm_label (disp);
11530 output_addr_const (file, disp);
11535 print_reg (base, 0, file);
11539 print_reg (index, 0, file);
11541 fprintf (file, ",%d", scale);
11547 rtx offset = NULL_RTX;
11551 /* Pull out the offset of a symbol; print any symbol itself. */
11552 if (GET_CODE (disp) == CONST
11553 && GET_CODE (XEXP (disp, 0)) == PLUS
11554 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11556 offset = XEXP (XEXP (disp, 0), 1);
11557 disp = gen_rtx_CONST (VOIDmode,
11558 XEXP (XEXP (disp, 0), 0));
11562 output_pic_addr_const (file, disp, 0);
11563 else if (GET_CODE (disp) == LABEL_REF)
11564 output_asm_label (disp);
11565 else if (CONST_INT_P (disp))
11568 output_addr_const (file, disp);
11574 print_reg (base, 0, file);
11577 if (INTVAL (offset) >= 0)
11579 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11583 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11590 print_reg (index, 0, file);
11592 fprintf (file, "*%d", scale);
11600 output_addr_const_extra (FILE *file, rtx x)
11604 if (GET_CODE (x) != UNSPEC)
11607 op = XVECEXP (x, 0, 0);
11608 switch (XINT (x, 1))
11610 case UNSPEC_GOTTPOFF:
11611 output_addr_const (file, op);
11612 /* FIXME: This might be @TPOFF in Sun ld. */
11613 fputs ("@GOTTPOFF", file);
11616 output_addr_const (file, op);
11617 fputs ("@TPOFF", file);
11619 case UNSPEC_NTPOFF:
11620 output_addr_const (file, op);
11622 fputs ("@TPOFF", file);
11624 fputs ("@NTPOFF", file);
11626 case UNSPEC_DTPOFF:
11627 output_addr_const (file, op);
11628 fputs ("@DTPOFF", file);
11630 case UNSPEC_GOTNTPOFF:
11631 output_addr_const (file, op);
11633 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11634 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
11636 fputs ("@GOTNTPOFF", file);
11638 case UNSPEC_INDNTPOFF:
11639 output_addr_const (file, op);
11640 fputs ("@INDNTPOFF", file);
11643 case UNSPEC_MACHOPIC_OFFSET:
11644 output_addr_const (file, op);
11646 machopic_output_function_base_name (file);
11657 /* Split one or more DImode RTL references into pairs of SImode
11658 references. The RTL can be REG, offsettable MEM, integer constant, or
11659 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11660 split and "num" is its length. lo_half and hi_half are output arrays
11661 that parallel "operands". */
11664 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11668 rtx op = operands[num];
11670 /* simplify_subreg refuse to split volatile memory addresses,
11671 but we still have to handle it. */
11674 lo_half[num] = adjust_address (op, SImode, 0);
11675 hi_half[num] = adjust_address (op, SImode, 4);
11679 lo_half[num] = simplify_gen_subreg (SImode, op,
11680 GET_MODE (op) == VOIDmode
11681 ? DImode : GET_MODE (op), 0);
11682 hi_half[num] = simplify_gen_subreg (SImode, op,
11683 GET_MODE (op) == VOIDmode
11684 ? DImode : GET_MODE (op), 4);
11688 /* Split one or more TImode RTL references into pairs of DImode
11689 references. The RTL can be REG, offsettable MEM, integer constant, or
11690 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11691 split and "num" is its length. lo_half and hi_half are output arrays
11692 that parallel "operands". */
11695 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11699 rtx op = operands[num];
11701 /* simplify_subreg refuse to split volatile memory addresses, but we
11702 still have to handle it. */
11705 lo_half[num] = adjust_address (op, DImode, 0);
11706 hi_half[num] = adjust_address (op, DImode, 8);
11710 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
11711 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
11716 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
11717 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
11718 is the expression of the binary operation. The output may either be
11719 emitted here, or returned to the caller, like all output_* functions.
11721 There is no guarantee that the operands are the same mode, as they
11722 might be within FLOAT or FLOAT_EXTEND expressions. */
11724 #ifndef SYSV386_COMPAT
11725 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
11726 wants to fix the assemblers because that causes incompatibility
11727 with gcc. No-one wants to fix gcc because that causes
11728 incompatibility with assemblers... You can use the option of
11729 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
11730 #define SYSV386_COMPAT 1
11734 output_387_binary_op (rtx insn, rtx *operands)
11736 static char buf[40];
11739 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
11741 #ifdef ENABLE_CHECKING
11742 /* Even if we do not want to check the inputs, this documents input
11743 constraints. Which helps in understanding the following code. */
11744 if (STACK_REG_P (operands[0])
11745 && ((REG_P (operands[1])
11746 && REGNO (operands[0]) == REGNO (operands[1])
11747 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
11748 || (REG_P (operands[2])
11749 && REGNO (operands[0]) == REGNO (operands[2])
11750 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
11751 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
11754 gcc_assert (is_sse);
11757 switch (GET_CODE (operands[3]))
11760 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11761 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11769 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11770 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11778 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11779 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11787 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11788 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11796 gcc_unreachable ();
11803 strcpy (buf, ssep);
11804 if (GET_MODE (operands[0]) == SFmode)
11805 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
11807 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
11811 strcpy (buf, ssep + 1);
11812 if (GET_MODE (operands[0]) == SFmode)
11813 strcat (buf, "ss\t{%2, %0|%0, %2}");
11815 strcat (buf, "sd\t{%2, %0|%0, %2}");
11821 switch (GET_CODE (operands[3]))
11825 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
11827 rtx temp = operands[2];
11828 operands[2] = operands[1];
11829 operands[1] = temp;
11832 /* know operands[0] == operands[1]. */
11834 if (MEM_P (operands[2]))
11840 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
11842 if (STACK_TOP_P (operands[0]))
11843 /* How is it that we are storing to a dead operand[2]?
11844 Well, presumably operands[1] is dead too. We can't
11845 store the result to st(0) as st(0) gets popped on this
11846 instruction. Instead store to operands[2] (which I
11847 think has to be st(1)). st(1) will be popped later.
11848 gcc <= 2.8.1 didn't have this check and generated
11849 assembly code that the Unixware assembler rejected. */
11850 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
11852 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
11856 if (STACK_TOP_P (operands[0]))
11857 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
11859 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
11864 if (MEM_P (operands[1]))
11870 if (MEM_P (operands[2]))
11876 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
11879 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
11880 derived assemblers, confusingly reverse the direction of
11881 the operation for fsub{r} and fdiv{r} when the
11882 destination register is not st(0). The Intel assembler
11883 doesn't have this brain damage. Read !SYSV386_COMPAT to
11884 figure out what the hardware really does. */
11885 if (STACK_TOP_P (operands[0]))
11886 p = "{p\t%0, %2|rp\t%2, %0}";
11888 p = "{rp\t%2, %0|p\t%0, %2}";
11890 if (STACK_TOP_P (operands[0]))
11891 /* As above for fmul/fadd, we can't store to st(0). */
11892 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
11894 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
11899 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
11902 if (STACK_TOP_P (operands[0]))
11903 p = "{rp\t%0, %1|p\t%1, %0}";
11905 p = "{p\t%1, %0|rp\t%0, %1}";
11907 if (STACK_TOP_P (operands[0]))
11908 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
11910 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
11915 if (STACK_TOP_P (operands[0]))
11917 if (STACK_TOP_P (operands[1]))
11918 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
11920 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
11923 else if (STACK_TOP_P (operands[1]))
11926 p = "{\t%1, %0|r\t%0, %1}";
11928 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
11934 p = "{r\t%2, %0|\t%0, %2}";
11936 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
11942 gcc_unreachable ();
11949 /* Return needed mode for entity in optimize_mode_switching pass. */
11952 ix86_mode_needed (int entity, rtx insn)
11954 enum attr_i387_cw mode;
11956 /* The mode UNINITIALIZED is used to store control word after a
11957 function call or ASM pattern. The mode ANY specify that function
11958 has no requirements on the control word and make no changes in the
11959 bits we are interested in. */
11962 || (NONJUMP_INSN_P (insn)
11963 && (asm_noperands (PATTERN (insn)) >= 0
11964 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
11965 return I387_CW_UNINITIALIZED;
11967 if (recog_memoized (insn) < 0)
11968 return I387_CW_ANY;
11970 mode = get_attr_i387_cw (insn);
11975 if (mode == I387_CW_TRUNC)
11980 if (mode == I387_CW_FLOOR)
11985 if (mode == I387_CW_CEIL)
11990 if (mode == I387_CW_MASK_PM)
11995 gcc_unreachable ();
11998 return I387_CW_ANY;
12001 /* Output code to initialize control word copies used by trunc?f?i and
12002 rounding patterns. CURRENT_MODE is set to current control word,
12003 while NEW_MODE is set to new control word. */
12006 emit_i387_cw_initialization (int mode)
12008 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
12011 enum ix86_stack_slot slot;
12013 rtx reg = gen_reg_rtx (HImode);
12015 emit_insn (gen_x86_fnstcw_1 (stored_mode));
12016 emit_move_insn (reg, copy_rtx (stored_mode));
12018 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
12019 || optimize_function_for_size_p (cfun))
12023 case I387_CW_TRUNC:
12024 /* round toward zero (truncate) */
12025 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
12026 slot = SLOT_CW_TRUNC;
12029 case I387_CW_FLOOR:
12030 /* round down toward -oo */
12031 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12032 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
12033 slot = SLOT_CW_FLOOR;
12037 /* round up toward +oo */
12038 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12039 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
12040 slot = SLOT_CW_CEIL;
12043 case I387_CW_MASK_PM:
12044 /* mask precision exception for nearbyint() */
12045 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12046 slot = SLOT_CW_MASK_PM;
12050 gcc_unreachable ();
12057 case I387_CW_TRUNC:
12058 /* round toward zero (truncate) */
12059 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
12060 slot = SLOT_CW_TRUNC;
12063 case I387_CW_FLOOR:
12064 /* round down toward -oo */
12065 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
12066 slot = SLOT_CW_FLOOR;
12070 /* round up toward +oo */
12071 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
12072 slot = SLOT_CW_CEIL;
12075 case I387_CW_MASK_PM:
12076 /* mask precision exception for nearbyint() */
12077 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12078 slot = SLOT_CW_MASK_PM;
12082 gcc_unreachable ();
12086 gcc_assert (slot < MAX_386_STACK_LOCALS);
12088 new_mode = assign_386_stack_local (HImode, slot);
12089 emit_move_insn (new_mode, reg);
12092 /* Output code for INSN to convert a float to a signed int. OPERANDS
12093 are the insn operands. The output may be [HSD]Imode and the input
12094 operand may be [SDX]Fmode. */
12097 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
12099 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12100 int dimode_p = GET_MODE (operands[0]) == DImode;
12101 int round_mode = get_attr_i387_cw (insn);
12103 /* Jump through a hoop or two for DImode, since the hardware has no
12104 non-popping instruction. We used to do this a different way, but
12105 that was somewhat fragile and broke with post-reload splitters. */
12106 if ((dimode_p || fisttp) && !stack_top_dies)
12107 output_asm_insn ("fld\t%y1", operands);
12109 gcc_assert (STACK_TOP_P (operands[1]));
12110 gcc_assert (MEM_P (operands[0]));
12111 gcc_assert (GET_MODE (operands[1]) != TFmode);
12114 output_asm_insn ("fisttp%Z0\t%0", operands);
12117 if (round_mode != I387_CW_ANY)
12118 output_asm_insn ("fldcw\t%3", operands);
12119 if (stack_top_dies || dimode_p)
12120 output_asm_insn ("fistp%Z0\t%0", operands);
12122 output_asm_insn ("fist%Z0\t%0", operands);
12123 if (round_mode != I387_CW_ANY)
12124 output_asm_insn ("fldcw\t%2", operands);
12130 /* Output code for x87 ffreep insn. The OPNO argument, which may only
12131 have the values zero or one, indicates the ffreep insn's operand
12132 from the OPERANDS array. */
12134 static const char *
12135 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
12137 if (TARGET_USE_FFREEP)
12138 #if HAVE_AS_IX86_FFREEP
12139 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
12142 static char retval[] = ".word\t0xc_df";
12143 int regno = REGNO (operands[opno]);
12145 gcc_assert (FP_REGNO_P (regno));
12147 retval[9] = '0' + (regno - FIRST_STACK_REG);
12152 return opno ? "fstp\t%y1" : "fstp\t%y0";
12156 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
12157 should be used. UNORDERED_P is true when fucom should be used. */
12160 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
12162 int stack_top_dies;
12163 rtx cmp_op0, cmp_op1;
12164 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
12168 cmp_op0 = operands[0];
12169 cmp_op1 = operands[1];
12173 cmp_op0 = operands[1];
12174 cmp_op1 = operands[2];
12179 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
12180 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
12181 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
12182 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
12184 if (GET_MODE (operands[0]) == SFmode)
12186 return &ucomiss[TARGET_AVX ? 0 : 1];
12188 return &comiss[TARGET_AVX ? 0 : 1];
12191 return &ucomisd[TARGET_AVX ? 0 : 1];
12193 return &comisd[TARGET_AVX ? 0 : 1];
12196 gcc_assert (STACK_TOP_P (cmp_op0));
12198 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12200 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
12202 if (stack_top_dies)
12204 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
12205 return output_387_ffreep (operands, 1);
12208 return "ftst\n\tfnstsw\t%0";
12211 if (STACK_REG_P (cmp_op1)
12213 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
12214 && REGNO (cmp_op1) != FIRST_STACK_REG)
12216 /* If both the top of the 387 stack dies, and the other operand
12217 is also a stack register that dies, then this must be a
12218 `fcompp' float compare */
12222 /* There is no double popping fcomi variant. Fortunately,
12223 eflags is immune from the fstp's cc clobbering. */
12225 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
12227 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
12228 return output_387_ffreep (operands, 0);
12233 return "fucompp\n\tfnstsw\t%0";
12235 return "fcompp\n\tfnstsw\t%0";
12240 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
12242 static const char * const alt[16] =
12244 "fcom%z2\t%y2\n\tfnstsw\t%0",
12245 "fcomp%z2\t%y2\n\tfnstsw\t%0",
12246 "fucom%z2\t%y2\n\tfnstsw\t%0",
12247 "fucomp%z2\t%y2\n\tfnstsw\t%0",
12249 "ficom%z2\t%y2\n\tfnstsw\t%0",
12250 "ficomp%z2\t%y2\n\tfnstsw\t%0",
12254 "fcomi\t{%y1, %0|%0, %y1}",
12255 "fcomip\t{%y1, %0|%0, %y1}",
12256 "fucomi\t{%y1, %0|%0, %y1}",
12257 "fucomip\t{%y1, %0|%0, %y1}",
12268 mask = eflags_p << 3;
12269 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
12270 mask |= unordered_p << 1;
12271 mask |= stack_top_dies;
12273 gcc_assert (mask < 16);
12282 ix86_output_addr_vec_elt (FILE *file, int value)
12284 const char *directive = ASM_LONG;
12288 directive = ASM_QUAD;
12290 gcc_assert (!TARGET_64BIT);
12293 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
12297 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
12299 const char *directive = ASM_LONG;
12302 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
12303 directive = ASM_QUAD;
12305 gcc_assert (!TARGET_64BIT);
12307 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
12308 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
12309 fprintf (file, "%s%s%d-%s%d\n",
12310 directive, LPREFIX, value, LPREFIX, rel);
12311 else if (HAVE_AS_GOTOFF_IN_DATA)
12312 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
12314 else if (TARGET_MACHO)
12316 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
12317 machopic_output_function_base_name (file);
12318 fprintf(file, "\n");
12322 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
12323 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
12326 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
12330 ix86_expand_clear (rtx dest)
12334 /* We play register width games, which are only valid after reload. */
12335 gcc_assert (reload_completed);
12337 /* Avoid HImode and its attendant prefix byte. */
12338 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
12339 dest = gen_rtx_REG (SImode, REGNO (dest));
12340 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
12342 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
12343 if (reload_completed && (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ()))
12345 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12346 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
12352 /* X is an unchanging MEM. If it is a constant pool reference, return
12353 the constant pool rtx, else NULL. */
12356 maybe_get_pool_constant (rtx x)
12358 x = ix86_delegitimize_address (XEXP (x, 0));
12360 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
12361 return get_pool_constant (x);
12367 ix86_expand_move (enum machine_mode mode, rtx operands[])
12370 enum tls_model model;
12375 if (GET_CODE (op1) == SYMBOL_REF)
12377 model = SYMBOL_REF_TLS_MODEL (op1);
12380 op1 = legitimize_tls_address (op1, model, true);
12381 op1 = force_operand (op1, op0);
12385 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12386 && SYMBOL_REF_DLLIMPORT_P (op1))
12387 op1 = legitimize_dllimport_symbol (op1, false);
12389 else if (GET_CODE (op1) == CONST
12390 && GET_CODE (XEXP (op1, 0)) == PLUS
12391 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
12393 rtx addend = XEXP (XEXP (op1, 0), 1);
12394 rtx symbol = XEXP (XEXP (op1, 0), 0);
12397 model = SYMBOL_REF_TLS_MODEL (symbol);
12399 tmp = legitimize_tls_address (symbol, model, true);
12400 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12401 && SYMBOL_REF_DLLIMPORT_P (symbol))
12402 tmp = legitimize_dllimport_symbol (symbol, true);
12406 tmp = force_operand (tmp, NULL);
12407 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
12408 op0, 1, OPTAB_DIRECT);
12414 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
12416 if (TARGET_MACHO && !TARGET_64BIT)
12421 rtx temp = ((reload_in_progress
12422 || ((op0 && REG_P (op0))
12424 ? op0 : gen_reg_rtx (Pmode));
12425 op1 = machopic_indirect_data_reference (op1, temp);
12426 op1 = machopic_legitimize_pic_address (op1, mode,
12427 temp == op1 ? 0 : temp);
12429 else if (MACHOPIC_INDIRECT)
12430 op1 = machopic_indirect_data_reference (op1, 0);
12438 op1 = force_reg (Pmode, op1);
12439 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
12441 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
12442 op1 = legitimize_pic_address (op1, reg);
12451 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
12452 || !push_operand (op0, mode))
12454 op1 = force_reg (mode, op1);
12456 if (push_operand (op0, mode)
12457 && ! general_no_elim_operand (op1, mode))
12458 op1 = copy_to_mode_reg (mode, op1);
12460 /* Force large constants in 64bit compilation into register
12461 to get them CSEed. */
12462 if (can_create_pseudo_p ()
12463 && (mode == DImode) && TARGET_64BIT
12464 && immediate_operand (op1, mode)
12465 && !x86_64_zext_immediate_operand (op1, VOIDmode)
12466 && !register_operand (op0, mode)
12468 op1 = copy_to_mode_reg (mode, op1);
12470 if (can_create_pseudo_p ()
12471 && FLOAT_MODE_P (mode)
12472 && GET_CODE (op1) == CONST_DOUBLE)
12474 /* If we are loading a floating point constant to a register,
12475 force the value to memory now, since we'll get better code
12476 out the back end. */
12478 op1 = validize_mem (force_const_mem (mode, op1));
12479 if (!register_operand (op0, mode))
12481 rtx temp = gen_reg_rtx (mode);
12482 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
12483 emit_move_insn (op0, temp);
12489 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12493 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
12495 rtx op0 = operands[0], op1 = operands[1];
12496 unsigned int align = GET_MODE_ALIGNMENT (mode);
12498 /* Force constants other than zero into memory. We do not know how
12499 the instructions used to build constants modify the upper 64 bits
12500 of the register, once we have that information we may be able
12501 to handle some of them more efficiently. */
12502 if (can_create_pseudo_p ()
12503 && register_operand (op0, mode)
12504 && (CONSTANT_P (op1)
12505 || (GET_CODE (op1) == SUBREG
12506 && CONSTANT_P (SUBREG_REG (op1))))
12507 && standard_sse_constant_p (op1) <= 0)
12508 op1 = validize_mem (force_const_mem (mode, op1));
12510 /* We need to check memory alignment for SSE mode since attribute
12511 can make operands unaligned. */
12512 if (can_create_pseudo_p ()
12513 && SSE_REG_MODE_P (mode)
12514 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
12515 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
12519 /* ix86_expand_vector_move_misalign() does not like constants ... */
12520 if (CONSTANT_P (op1)
12521 || (GET_CODE (op1) == SUBREG
12522 && CONSTANT_P (SUBREG_REG (op1))))
12523 op1 = validize_mem (force_const_mem (mode, op1));
12525 /* ... nor both arguments in memory. */
12526 if (!register_operand (op0, mode)
12527 && !register_operand (op1, mode))
12528 op1 = force_reg (mode, op1);
12530 tmp[0] = op0; tmp[1] = op1;
12531 ix86_expand_vector_move_misalign (mode, tmp);
12535 /* Make operand1 a register if it isn't already. */
12536 if (can_create_pseudo_p ()
12537 && !register_operand (op0, mode)
12538 && !register_operand (op1, mode))
12540 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
12544 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12547 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
12548 straight to ix86_expand_vector_move. */
12549 /* Code generation for scalar reg-reg moves of single and double precision data:
12550 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
12554 if (x86_sse_partial_reg_dependency == true)
12559 Code generation for scalar loads of double precision data:
12560 if (x86_sse_split_regs == true)
12561 movlpd mem, reg (gas syntax)
12565 Code generation for unaligned packed loads of single precision data
12566 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
12567 if (x86_sse_unaligned_move_optimal)
12570 if (x86_sse_partial_reg_dependency == true)
12582 Code generation for unaligned packed loads of double precision data
12583 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
12584 if (x86_sse_unaligned_move_optimal)
12587 if (x86_sse_split_regs == true)
12600 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
12609 switch (GET_MODE_CLASS (mode))
12611 case MODE_VECTOR_INT:
12613 switch (GET_MODE_SIZE (mode))
12616 op0 = gen_lowpart (V16QImode, op0);
12617 op1 = gen_lowpart (V16QImode, op1);
12618 emit_insn (gen_avx_movdqu (op0, op1));
12621 op0 = gen_lowpart (V32QImode, op0);
12622 op1 = gen_lowpart (V32QImode, op1);
12623 emit_insn (gen_avx_movdqu256 (op0, op1));
12626 gcc_unreachable ();
12629 case MODE_VECTOR_FLOAT:
12630 op0 = gen_lowpart (mode, op0);
12631 op1 = gen_lowpart (mode, op1);
12636 emit_insn (gen_avx_movups (op0, op1));
12639 emit_insn (gen_avx_movups256 (op0, op1));
12642 emit_insn (gen_avx_movupd (op0, op1));
12645 emit_insn (gen_avx_movupd256 (op0, op1));
12648 gcc_unreachable ();
12653 gcc_unreachable ();
12661 /* If we're optimizing for size, movups is the smallest. */
12662 if (optimize_insn_for_size_p ())
12664 op0 = gen_lowpart (V4SFmode, op0);
12665 op1 = gen_lowpart (V4SFmode, op1);
12666 emit_insn (gen_sse_movups (op0, op1));
12670 /* ??? If we have typed data, then it would appear that using
12671 movdqu is the only way to get unaligned data loaded with
12673 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12675 op0 = gen_lowpart (V16QImode, op0);
12676 op1 = gen_lowpart (V16QImode, op1);
12677 emit_insn (gen_sse2_movdqu (op0, op1));
12681 if (TARGET_SSE2 && mode == V2DFmode)
12685 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
12687 op0 = gen_lowpart (V2DFmode, op0);
12688 op1 = gen_lowpart (V2DFmode, op1);
12689 emit_insn (gen_sse2_movupd (op0, op1));
12693 /* When SSE registers are split into halves, we can avoid
12694 writing to the top half twice. */
12695 if (TARGET_SSE_SPLIT_REGS)
12697 emit_clobber (op0);
12702 /* ??? Not sure about the best option for the Intel chips.
12703 The following would seem to satisfy; the register is
12704 entirely cleared, breaking the dependency chain. We
12705 then store to the upper half, with a dependency depth
12706 of one. A rumor has it that Intel recommends two movsd
12707 followed by an unpacklpd, but this is unconfirmed. And
12708 given that the dependency depth of the unpacklpd would
12709 still be one, I'm not sure why this would be better. */
12710 zero = CONST0_RTX (V2DFmode);
12713 m = adjust_address (op1, DFmode, 0);
12714 emit_insn (gen_sse2_loadlpd (op0, zero, m));
12715 m = adjust_address (op1, DFmode, 8);
12716 emit_insn (gen_sse2_loadhpd (op0, op0, m));
12720 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
12722 op0 = gen_lowpart (V4SFmode, op0);
12723 op1 = gen_lowpart (V4SFmode, op1);
12724 emit_insn (gen_sse_movups (op0, op1));
12728 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
12729 emit_move_insn (op0, CONST0_RTX (mode));
12731 emit_clobber (op0);
12733 if (mode != V4SFmode)
12734 op0 = gen_lowpart (V4SFmode, op0);
12735 m = adjust_address (op1, V2SFmode, 0);
12736 emit_insn (gen_sse_loadlps (op0, op0, m));
12737 m = adjust_address (op1, V2SFmode, 8);
12738 emit_insn (gen_sse_loadhps (op0, op0, m));
12741 else if (MEM_P (op0))
12743 /* If we're optimizing for size, movups is the smallest. */
12744 if (optimize_insn_for_size_p ())
12746 op0 = gen_lowpart (V4SFmode, op0);
12747 op1 = gen_lowpart (V4SFmode, op1);
12748 emit_insn (gen_sse_movups (op0, op1));
12752 /* ??? Similar to above, only less clear because of quote
12753 typeless stores unquote. */
12754 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
12755 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12757 op0 = gen_lowpart (V16QImode, op0);
12758 op1 = gen_lowpart (V16QImode, op1);
12759 emit_insn (gen_sse2_movdqu (op0, op1));
12763 if (TARGET_SSE2 && mode == V2DFmode)
12765 m = adjust_address (op0, DFmode, 0);
12766 emit_insn (gen_sse2_storelpd (m, op1));
12767 m = adjust_address (op0, DFmode, 8);
12768 emit_insn (gen_sse2_storehpd (m, op1));
12772 if (mode != V4SFmode)
12773 op1 = gen_lowpart (V4SFmode, op1);
12774 m = adjust_address (op0, V2SFmode, 0);
12775 emit_insn (gen_sse_storelps (m, op1));
12776 m = adjust_address (op0, V2SFmode, 8);
12777 emit_insn (gen_sse_storehps (m, op1));
12781 gcc_unreachable ();
12784 /* Expand a push in MODE. This is some mode for which we do not support
12785 proper push instructions, at least from the registers that we expect
12786 the value to live in. */
12789 ix86_expand_push (enum machine_mode mode, rtx x)
12793 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
12794 GEN_INT (-GET_MODE_SIZE (mode)),
12795 stack_pointer_rtx, 1, OPTAB_DIRECT);
12796 if (tmp != stack_pointer_rtx)
12797 emit_move_insn (stack_pointer_rtx, tmp);
12799 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
12801 /* When we push an operand onto stack, it has to be aligned at least
12802 at the function argument boundary. However since we don't have
12803 the argument type, we can't determine the actual argument
12805 emit_move_insn (tmp, x);
12808 /* Helper function of ix86_fixup_binary_operands to canonicalize
12809 operand order. Returns true if the operands should be swapped. */
12812 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
12815 rtx dst = operands[0];
12816 rtx src1 = operands[1];
12817 rtx src2 = operands[2];
12819 /* If the operation is not commutative, we can't do anything. */
12820 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
12823 /* Highest priority is that src1 should match dst. */
12824 if (rtx_equal_p (dst, src1))
12826 if (rtx_equal_p (dst, src2))
12829 /* Next highest priority is that immediate constants come second. */
12830 if (immediate_operand (src2, mode))
12832 if (immediate_operand (src1, mode))
12835 /* Lowest priority is that memory references should come second. */
12845 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
12846 destination to use for the operation. If different from the true
12847 destination in operands[0], a copy operation will be required. */
12850 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
12853 rtx dst = operands[0];
12854 rtx src1 = operands[1];
12855 rtx src2 = operands[2];
12857 /* Canonicalize operand order. */
12858 if (ix86_swap_binary_operands_p (code, mode, operands))
12862 /* It is invalid to swap operands of different modes. */
12863 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
12870 /* Both source operands cannot be in memory. */
12871 if (MEM_P (src1) && MEM_P (src2))
12873 /* Optimization: Only read from memory once. */
12874 if (rtx_equal_p (src1, src2))
12876 src2 = force_reg (mode, src2);
12880 src2 = force_reg (mode, src2);
12883 /* If the destination is memory, and we do not have matching source
12884 operands, do things in registers. */
12885 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
12886 dst = gen_reg_rtx (mode);
12888 /* Source 1 cannot be a constant. */
12889 if (CONSTANT_P (src1))
12890 src1 = force_reg (mode, src1);
12892 /* Source 1 cannot be a non-matching memory. */
12893 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
12894 src1 = force_reg (mode, src1);
12896 operands[1] = src1;
12897 operands[2] = src2;
12901 /* Similarly, but assume that the destination has already been
12902 set up properly. */
12905 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
12906 enum machine_mode mode, rtx operands[])
12908 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
12909 gcc_assert (dst == operands[0]);
12912 /* Attempt to expand a binary operator. Make the expansion closer to the
12913 actual machine, then just general_operand, which will allow 3 separate
12914 memory references (one output, two input) in a single insn. */
12917 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
12920 rtx src1, src2, dst, op, clob;
12922 dst = ix86_fixup_binary_operands (code, mode, operands);
12923 src1 = operands[1];
12924 src2 = operands[2];
12926 /* Emit the instruction. */
12928 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
12929 if (reload_in_progress)
12931 /* Reload doesn't know about the flags register, and doesn't know that
12932 it doesn't want to clobber it. We can only do this with PLUS. */
12933 gcc_assert (code == PLUS);
12938 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12939 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
12942 /* Fix up the destination if needed. */
12943 if (dst != operands[0])
12944 emit_move_insn (operands[0], dst);
12947 /* Return TRUE or FALSE depending on whether the binary operator meets the
12948 appropriate constraints. */
12951 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
12954 rtx dst = operands[0];
12955 rtx src1 = operands[1];
12956 rtx src2 = operands[2];
12958 /* Both source operands cannot be in memory. */
12959 if (MEM_P (src1) && MEM_P (src2))
12962 /* Canonicalize operand order for commutative operators. */
12963 if (ix86_swap_binary_operands_p (code, mode, operands))
12970 /* If the destination is memory, we must have a matching source operand. */
12971 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
12974 /* Source 1 cannot be a constant. */
12975 if (CONSTANT_P (src1))
12978 /* Source 1 cannot be a non-matching memory. */
12979 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
12985 /* Attempt to expand a unary operator. Make the expansion closer to the
12986 actual machine, then just general_operand, which will allow 2 separate
12987 memory references (one output, one input) in a single insn. */
12990 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
12993 int matching_memory;
12994 rtx src, dst, op, clob;
12999 /* If the destination is memory, and we do not have matching source
13000 operands, do things in registers. */
13001 matching_memory = 0;
13004 if (rtx_equal_p (dst, src))
13005 matching_memory = 1;
13007 dst = gen_reg_rtx (mode);
13010 /* When source operand is memory, destination must match. */
13011 if (MEM_P (src) && !matching_memory)
13012 src = force_reg (mode, src);
13014 /* Emit the instruction. */
13016 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
13017 if (reload_in_progress || code == NOT)
13019 /* Reload doesn't know about the flags register, and doesn't know that
13020 it doesn't want to clobber it. */
13021 gcc_assert (code == NOT);
13026 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13027 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
13030 /* Fix up the destination if needed. */
13031 if (dst != operands[0])
13032 emit_move_insn (operands[0], dst);
13035 #define LEA_SEARCH_THRESHOLD 12
13037 /* Search backward for non-agu definition of register number REGNO1
13038 or register number REGNO2 in INSN's basic block until
13039 1. Pass LEA_SEARCH_THRESHOLD instructions, or
13040 2. Reach BB boundary, or
13041 3. Reach agu definition.
13042 Returns the distance between the non-agu definition point and INSN.
13043 If no definition point, returns -1. */
13046 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
13049 basic_block bb = BLOCK_FOR_INSN (insn);
13052 enum attr_type insn_type;
13054 if (insn != BB_HEAD (bb))
13056 rtx prev = PREV_INSN (insn);
13057 while (prev && distance < LEA_SEARCH_THRESHOLD)
13062 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
13063 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13064 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13065 && (regno1 == DF_REF_REGNO (*def_rec)
13066 || regno2 == DF_REF_REGNO (*def_rec)))
13068 insn_type = get_attr_type (prev);
13069 if (insn_type != TYPE_LEA)
13073 if (prev == BB_HEAD (bb))
13075 prev = PREV_INSN (prev);
13079 if (distance < LEA_SEARCH_THRESHOLD)
13083 bool simple_loop = false;
13085 FOR_EACH_EDGE (e, ei, bb->preds)
13088 simple_loop = true;
13094 rtx prev = BB_END (bb);
13097 && distance < LEA_SEARCH_THRESHOLD)
13102 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
13103 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13104 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13105 && (regno1 == DF_REF_REGNO (*def_rec)
13106 || regno2 == DF_REF_REGNO (*def_rec)))
13108 insn_type = get_attr_type (prev);
13109 if (insn_type != TYPE_LEA)
13113 prev = PREV_INSN (prev);
13121 /* get_attr_type may modify recog data. We want to make sure
13122 that recog data is valid for instruction INSN, on which
13123 distance_non_agu_define is called. INSN is unchanged here. */
13124 extract_insn_cached (insn);
13128 /* Return the distance between INSN and the next insn that uses
13129 register number REGNO0 in memory address. Return -1 if no such
13130 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
13133 distance_agu_use (unsigned int regno0, rtx insn)
13135 basic_block bb = BLOCK_FOR_INSN (insn);
13140 if (insn != BB_END (bb))
13142 rtx next = NEXT_INSN (insn);
13143 while (next && distance < LEA_SEARCH_THRESHOLD)
13149 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
13150 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
13151 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
13152 && regno0 == DF_REF_REGNO (*use_rec))
13154 /* Return DISTANCE if OP0 is used in memory
13155 address in NEXT. */
13159 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
13160 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13161 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13162 && regno0 == DF_REF_REGNO (*def_rec))
13164 /* Return -1 if OP0 is set in NEXT. */
13168 if (next == BB_END (bb))
13170 next = NEXT_INSN (next);
13174 if (distance < LEA_SEARCH_THRESHOLD)
13178 bool simple_loop = false;
13180 FOR_EACH_EDGE (e, ei, bb->succs)
13183 simple_loop = true;
13189 rtx next = BB_HEAD (bb);
13192 && distance < LEA_SEARCH_THRESHOLD)
13198 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
13199 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
13200 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
13201 && regno0 == DF_REF_REGNO (*use_rec))
13203 /* Return DISTANCE if OP0 is used in memory
13204 address in NEXT. */
13208 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
13209 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13210 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13211 && regno0 == DF_REF_REGNO (*def_rec))
13213 /* Return -1 if OP0 is set in NEXT. */
13218 next = NEXT_INSN (next);
13226 /* Define this macro to tune LEA priority vs ADD, it take effect when
13227 there is a dilemma of choicing LEA or ADD
13228 Negative value: ADD is more preferred than LEA
13230 Positive value: LEA is more preferred than ADD*/
13231 #define IX86_LEA_PRIORITY 2
13233 /* Return true if it is ok to optimize an ADD operation to LEA
13234 operation to avoid flag register consumation. For the processors
13235 like ATOM, if the destination register of LEA holds an actual
13236 address which will be used soon, LEA is better and otherwise ADD
13240 ix86_lea_for_add_ok (enum rtx_code code ATTRIBUTE_UNUSED,
13241 rtx insn, rtx operands[])
13243 unsigned int regno0 = true_regnum (operands[0]);
13244 unsigned int regno1 = true_regnum (operands[1]);
13245 unsigned int regno2;
13247 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
13248 return regno0 != regno1;
13250 regno2 = true_regnum (operands[2]);
13252 /* If a = b + c, (a!=b && a!=c), must use lea form. */
13253 if (regno0 != regno1 && regno0 != regno2)
13257 int dist_define, dist_use;
13258 dist_define = distance_non_agu_define (regno1, regno2, insn);
13259 if (dist_define <= 0)
13262 /* If this insn has both backward non-agu dependence and forward
13263 agu dependence, the one with short distance take effect. */
13264 dist_use = distance_agu_use (regno0, insn);
13266 || (dist_define + IX86_LEA_PRIORITY) < dist_use)
13273 /* Return true if destination reg of SET_BODY is shift count of
13277 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
13283 /* Retrieve destination of SET_BODY. */
13284 switch (GET_CODE (set_body))
13287 set_dest = SET_DEST (set_body);
13288 if (!set_dest || !REG_P (set_dest))
13292 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
13293 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
13301 /* Retrieve shift count of USE_BODY. */
13302 switch (GET_CODE (use_body))
13305 shift_rtx = XEXP (use_body, 1);
13308 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
13309 if (ix86_dep_by_shift_count_body (set_body,
13310 XVECEXP (use_body, 0, i)))
13318 && (GET_CODE (shift_rtx) == ASHIFT
13319 || GET_CODE (shift_rtx) == LSHIFTRT
13320 || GET_CODE (shift_rtx) == ASHIFTRT
13321 || GET_CODE (shift_rtx) == ROTATE
13322 || GET_CODE (shift_rtx) == ROTATERT))
13324 rtx shift_count = XEXP (shift_rtx, 1);
13326 /* Return true if shift count is dest of SET_BODY. */
13327 if (REG_P (shift_count)
13328 && true_regnum (set_dest) == true_regnum (shift_count))
13335 /* Return true if destination reg of SET_INSN is shift count of
13339 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
13341 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
13342 PATTERN (use_insn));
13345 /* Return TRUE or FALSE depending on whether the unary operator meets the
13346 appropriate constraints. */
13349 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
13350 enum machine_mode mode ATTRIBUTE_UNUSED,
13351 rtx operands[2] ATTRIBUTE_UNUSED)
13353 /* If one of operands is memory, source and destination must match. */
13354 if ((MEM_P (operands[0])
13355 || MEM_P (operands[1]))
13356 && ! rtx_equal_p (operands[0], operands[1]))
13361 /* Post-reload splitter for converting an SF or DFmode value in an
13362 SSE register into an unsigned SImode. */
13365 ix86_split_convert_uns_si_sse (rtx operands[])
13367 enum machine_mode vecmode;
13368 rtx value, large, zero_or_two31, input, two31, x;
13370 large = operands[1];
13371 zero_or_two31 = operands[2];
13372 input = operands[3];
13373 two31 = operands[4];
13374 vecmode = GET_MODE (large);
13375 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
13377 /* Load up the value into the low element. We must ensure that the other
13378 elements are valid floats -- zero is the easiest such value. */
13381 if (vecmode == V4SFmode)
13382 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
13384 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
13388 input = gen_rtx_REG (vecmode, REGNO (input));
13389 emit_move_insn (value, CONST0_RTX (vecmode));
13390 if (vecmode == V4SFmode)
13391 emit_insn (gen_sse_movss (value, value, input));
13393 emit_insn (gen_sse2_movsd (value, value, input));
13396 emit_move_insn (large, two31);
13397 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
13399 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
13400 emit_insn (gen_rtx_SET (VOIDmode, large, x));
13402 x = gen_rtx_AND (vecmode, zero_or_two31, large);
13403 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
13405 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
13406 emit_insn (gen_rtx_SET (VOIDmode, value, x));
13408 large = gen_rtx_REG (V4SImode, REGNO (large));
13409 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
13411 x = gen_rtx_REG (V4SImode, REGNO (value));
13412 if (vecmode == V4SFmode)
13413 emit_insn (gen_sse2_cvttps2dq (x, value));
13415 emit_insn (gen_sse2_cvttpd2dq (x, value));
13418 emit_insn (gen_xorv4si3 (value, value, large));
13421 /* Convert an unsigned DImode value into a DFmode, using only SSE.
13422 Expects the 64-bit DImode to be supplied in a pair of integral
13423 registers. Requires SSE2; will use SSE3 if available. For x86_32,
13424 -mfpmath=sse, !optimize_size only. */
13427 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
13429 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
13430 rtx int_xmm, fp_xmm;
13431 rtx biases, exponents;
13434 int_xmm = gen_reg_rtx (V4SImode);
13435 if (TARGET_INTER_UNIT_MOVES)
13436 emit_insn (gen_movdi_to_sse (int_xmm, input));
13437 else if (TARGET_SSE_SPLIT_REGS)
13439 emit_clobber (int_xmm);
13440 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
13444 x = gen_reg_rtx (V2DImode);
13445 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
13446 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
13449 x = gen_rtx_CONST_VECTOR (V4SImode,
13450 gen_rtvec (4, GEN_INT (0x43300000UL),
13451 GEN_INT (0x45300000UL),
13452 const0_rtx, const0_rtx));
13453 exponents = validize_mem (force_const_mem (V4SImode, x));
13455 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
13456 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
13458 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
13459 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
13460 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
13461 (0x1.0p84 + double(fp_value_hi_xmm)).
13462 Note these exponents differ by 32. */
13464 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
13466 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
13467 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
13468 real_ldexp (&bias_lo_rvt, &dconst1, 52);
13469 real_ldexp (&bias_hi_rvt, &dconst1, 84);
13470 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
13471 x = const_double_from_real_value (bias_hi_rvt, DFmode);
13472 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
13473 biases = validize_mem (force_const_mem (V2DFmode, biases));
13474 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
13476 /* Add the upper and lower DFmode values together. */
13478 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
13481 x = copy_to_mode_reg (V2DFmode, fp_xmm);
13482 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
13483 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
13486 ix86_expand_vector_extract (false, target, fp_xmm, 0);
13489 /* Not used, but eases macroization of patterns. */
13491 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
13492 rtx input ATTRIBUTE_UNUSED)
13494 gcc_unreachable ();
13497 /* Convert an unsigned SImode value into a DFmode. Only currently used
13498 for SSE, but applicable anywhere. */
13501 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
13503 REAL_VALUE_TYPE TWO31r;
13506 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
13507 NULL, 1, OPTAB_DIRECT);
13509 fp = gen_reg_rtx (DFmode);
13510 emit_insn (gen_floatsidf2 (fp, x));
13512 real_ldexp (&TWO31r, &dconst1, 31);
13513 x = const_double_from_real_value (TWO31r, DFmode);
13515 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
13517 emit_move_insn (target, x);
13520 /* Convert a signed DImode value into a DFmode. Only used for SSE in
13521 32-bit mode; otherwise we have a direct convert instruction. */
13524 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
13526 REAL_VALUE_TYPE TWO32r;
13527 rtx fp_lo, fp_hi, x;
13529 fp_lo = gen_reg_rtx (DFmode);
13530 fp_hi = gen_reg_rtx (DFmode);
13532 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
13534 real_ldexp (&TWO32r, &dconst1, 32);
13535 x = const_double_from_real_value (TWO32r, DFmode);
13536 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
13538 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
13540 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
13543 emit_move_insn (target, x);
13546 /* Convert an unsigned SImode value into a SFmode, using only SSE.
13547 For x86_32, -mfpmath=sse, !optimize_size only. */
13549 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
13551 REAL_VALUE_TYPE ONE16r;
13552 rtx fp_hi, fp_lo, int_hi, int_lo, x;
13554 real_ldexp (&ONE16r, &dconst1, 16);
13555 x = const_double_from_real_value (ONE16r, SFmode);
13556 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
13557 NULL, 0, OPTAB_DIRECT);
13558 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
13559 NULL, 0, OPTAB_DIRECT);
13560 fp_hi = gen_reg_rtx (SFmode);
13561 fp_lo = gen_reg_rtx (SFmode);
13562 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
13563 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
13564 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
13566 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
13568 if (!rtx_equal_p (target, fp_hi))
13569 emit_move_insn (target, fp_hi);
13572 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
13573 then replicate the value for all elements of the vector
13577 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
13584 v = gen_rtvec (4, value, value, value, value);
13585 return gen_rtx_CONST_VECTOR (V4SImode, v);
13589 v = gen_rtvec (2, value, value);
13590 return gen_rtx_CONST_VECTOR (V2DImode, v);
13594 v = gen_rtvec (4, value, value, value, value);
13596 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
13597 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
13598 return gen_rtx_CONST_VECTOR (V4SFmode, v);
13602 v = gen_rtvec (2, value, value);
13604 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
13605 return gen_rtx_CONST_VECTOR (V2DFmode, v);
13608 gcc_unreachable ();
13612 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
13613 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
13614 for an SSE register. If VECT is true, then replicate the mask for
13615 all elements of the vector register. If INVERT is true, then create
13616 a mask excluding the sign bit. */
13619 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
13621 enum machine_mode vec_mode, imode;
13622 HOST_WIDE_INT hi, lo;
13627 /* Find the sign bit, sign extended to 2*HWI. */
13633 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
13634 lo = 0x80000000, hi = lo < 0;
13640 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
13641 if (HOST_BITS_PER_WIDE_INT >= 64)
13642 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
13644 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
13649 vec_mode = VOIDmode;
13650 if (HOST_BITS_PER_WIDE_INT >= 64)
13653 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
13660 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
13664 lo = ~lo, hi = ~hi;
13670 mask = immed_double_const (lo, hi, imode);
13672 vec = gen_rtvec (2, v, mask);
13673 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
13674 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
13681 gcc_unreachable ();
13685 lo = ~lo, hi = ~hi;
13687 /* Force this value into the low part of a fp vector constant. */
13688 mask = immed_double_const (lo, hi, imode);
13689 mask = gen_lowpart (mode, mask);
13691 if (vec_mode == VOIDmode)
13692 return force_reg (mode, mask);
13694 v = ix86_build_const_vector (mode, vect, mask);
13695 return force_reg (vec_mode, v);
13698 /* Generate code for floating point ABS or NEG. */
13701 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
13704 rtx mask, set, use, clob, dst, src;
13705 bool use_sse = false;
13706 bool vector_mode = VECTOR_MODE_P (mode);
13707 enum machine_mode elt_mode = mode;
13711 elt_mode = GET_MODE_INNER (mode);
13714 else if (mode == TFmode)
13716 else if (TARGET_SSE_MATH)
13717 use_sse = SSE_FLOAT_MODE_P (mode);
13719 /* NEG and ABS performed with SSE use bitwise mask operations.
13720 Create the appropriate mask now. */
13722 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
13731 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
13732 set = gen_rtx_SET (VOIDmode, dst, set);
13737 set = gen_rtx_fmt_e (code, mode, src);
13738 set = gen_rtx_SET (VOIDmode, dst, set);
13741 use = gen_rtx_USE (VOIDmode, mask);
13742 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13743 emit_insn (gen_rtx_PARALLEL (VOIDmode,
13744 gen_rtvec (3, set, use, clob)));
13751 /* Expand a copysign operation. Special case operand 0 being a constant. */
13754 ix86_expand_copysign (rtx operands[])
13756 enum machine_mode mode;
13757 rtx dest, op0, op1, mask, nmask;
13759 dest = operands[0];
13763 mode = GET_MODE (dest);
13765 if (GET_CODE (op0) == CONST_DOUBLE)
13767 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
13769 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
13770 op0 = simplify_unary_operation (ABS, mode, op0, mode);
13772 if (mode == SFmode || mode == DFmode)
13774 enum machine_mode vmode;
13776 vmode = mode == SFmode ? V4SFmode : V2DFmode;
13778 if (op0 == CONST0_RTX (mode))
13779 op0 = CONST0_RTX (vmode);
13784 if (mode == SFmode)
13785 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
13786 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
13788 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
13790 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
13793 else if (op0 != CONST0_RTX (mode))
13794 op0 = force_reg (mode, op0);
13796 mask = ix86_build_signbit_mask (mode, 0, 0);
13798 if (mode == SFmode)
13799 copysign_insn = gen_copysignsf3_const;
13800 else if (mode == DFmode)
13801 copysign_insn = gen_copysigndf3_const;
13803 copysign_insn = gen_copysigntf3_const;
13805 emit_insn (copysign_insn (dest, op0, op1, mask));
13809 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
13811 nmask = ix86_build_signbit_mask (mode, 0, 1);
13812 mask = ix86_build_signbit_mask (mode, 0, 0);
13814 if (mode == SFmode)
13815 copysign_insn = gen_copysignsf3_var;
13816 else if (mode == DFmode)
13817 copysign_insn = gen_copysigndf3_var;
13819 copysign_insn = gen_copysigntf3_var;
13821 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
13825 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
13826 be a constant, and so has already been expanded into a vector constant. */
13829 ix86_split_copysign_const (rtx operands[])
13831 enum machine_mode mode, vmode;
13832 rtx dest, op0, op1, mask, x;
13834 dest = operands[0];
13837 mask = operands[3];
13839 mode = GET_MODE (dest);
13840 vmode = GET_MODE (mask);
13842 dest = simplify_gen_subreg (vmode, dest, mode, 0);
13843 x = gen_rtx_AND (vmode, dest, mask);
13844 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13846 if (op0 != CONST0_RTX (vmode))
13848 x = gen_rtx_IOR (vmode, dest, op0);
13849 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13853 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
13854 so we have to do two masks. */
13857 ix86_split_copysign_var (rtx operands[])
13859 enum machine_mode mode, vmode;
13860 rtx dest, scratch, op0, op1, mask, nmask, x;
13862 dest = operands[0];
13863 scratch = operands[1];
13866 nmask = operands[4];
13867 mask = operands[5];
13869 mode = GET_MODE (dest);
13870 vmode = GET_MODE (mask);
13872 if (rtx_equal_p (op0, op1))
13874 /* Shouldn't happen often (it's useless, obviously), but when it does
13875 we'd generate incorrect code if we continue below. */
13876 emit_move_insn (dest, op0);
13880 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
13882 gcc_assert (REGNO (op1) == REGNO (scratch));
13884 x = gen_rtx_AND (vmode, scratch, mask);
13885 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
13888 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
13889 x = gen_rtx_NOT (vmode, dest);
13890 x = gen_rtx_AND (vmode, x, op0);
13891 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13895 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
13897 x = gen_rtx_AND (vmode, scratch, mask);
13899 else /* alternative 2,4 */
13901 gcc_assert (REGNO (mask) == REGNO (scratch));
13902 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
13903 x = gen_rtx_AND (vmode, scratch, op1);
13905 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
13907 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
13909 dest = simplify_gen_subreg (vmode, op0, mode, 0);
13910 x = gen_rtx_AND (vmode, dest, nmask);
13912 else /* alternative 3,4 */
13914 gcc_assert (REGNO (nmask) == REGNO (dest));
13916 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
13917 x = gen_rtx_AND (vmode, dest, op0);
13919 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13922 x = gen_rtx_IOR (vmode, dest, scratch);
13923 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13926 /* Return TRUE or FALSE depending on whether the first SET in INSN
13927 has source and destination with matching CC modes, and that the
13928 CC mode is at least as constrained as REQ_MODE. */
13931 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
13934 enum machine_mode set_mode;
13936 set = PATTERN (insn);
13937 if (GET_CODE (set) == PARALLEL)
13938 set = XVECEXP (set, 0, 0);
13939 gcc_assert (GET_CODE (set) == SET);
13940 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
13942 set_mode = GET_MODE (SET_DEST (set));
13946 if (req_mode != CCNOmode
13947 && (req_mode != CCmode
13948 || XEXP (SET_SRC (set), 1) != const0_rtx))
13952 if (req_mode == CCGCmode)
13956 if (req_mode == CCGOCmode || req_mode == CCNOmode)
13960 if (req_mode == CCZmode)
13971 gcc_unreachable ();
13974 return (GET_MODE (SET_SRC (set)) == set_mode);
13977 /* Generate insn patterns to do an integer compare of OPERANDS. */
13980 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
13982 enum machine_mode cmpmode;
13985 cmpmode = SELECT_CC_MODE (code, op0, op1);
13986 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
13988 /* This is very simple, but making the interface the same as in the
13989 FP case makes the rest of the code easier. */
13990 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
13991 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
13993 /* Return the test that should be put into the flags user, i.e.
13994 the bcc, scc, or cmov instruction. */
13995 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
13998 /* Figure out whether to use ordered or unordered fp comparisons.
13999 Return the appropriate mode to use. */
14002 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
14004 /* ??? In order to make all comparisons reversible, we do all comparisons
14005 non-trapping when compiling for IEEE. Once gcc is able to distinguish
14006 all forms trapping and nontrapping comparisons, we can make inequality
14007 comparisons trapping again, since it results in better code when using
14008 FCOM based compares. */
14009 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
14013 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
14015 enum machine_mode mode = GET_MODE (op0);
14017 if (SCALAR_FLOAT_MODE_P (mode))
14019 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
14020 return ix86_fp_compare_mode (code);
14025 /* Only zero flag is needed. */
14026 case EQ: /* ZF=0 */
14027 case NE: /* ZF!=0 */
14029 /* Codes needing carry flag. */
14030 case GEU: /* CF=0 */
14031 case LTU: /* CF=1 */
14032 /* Detect overflow checks. They need just the carry flag. */
14033 if (GET_CODE (op0) == PLUS
14034 && rtx_equal_p (op1, XEXP (op0, 0)))
14038 case GTU: /* CF=0 & ZF=0 */
14039 case LEU: /* CF=1 | ZF=1 */
14040 /* Detect overflow checks. They need just the carry flag. */
14041 if (GET_CODE (op0) == MINUS
14042 && rtx_equal_p (op1, XEXP (op0, 0)))
14046 /* Codes possibly doable only with sign flag when
14047 comparing against zero. */
14048 case GE: /* SF=OF or SF=0 */
14049 case LT: /* SF<>OF or SF=1 */
14050 if (op1 == const0_rtx)
14053 /* For other cases Carry flag is not required. */
14055 /* Codes doable only with sign flag when comparing
14056 against zero, but we miss jump instruction for it
14057 so we need to use relational tests against overflow
14058 that thus needs to be zero. */
14059 case GT: /* ZF=0 & SF=OF */
14060 case LE: /* ZF=1 | SF<>OF */
14061 if (op1 == const0_rtx)
14065 /* strcmp pattern do (use flags) and combine may ask us for proper
14070 gcc_unreachable ();
14074 /* Return the fixed registers used for condition codes. */
14077 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
14084 /* If two condition code modes are compatible, return a condition code
14085 mode which is compatible with both. Otherwise, return
14088 static enum machine_mode
14089 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
14094 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
14097 if ((m1 == CCGCmode && m2 == CCGOCmode)
14098 || (m1 == CCGOCmode && m2 == CCGCmode))
14104 gcc_unreachable ();
14134 /* These are only compatible with themselves, which we already
14140 /* Split comparison code CODE into comparisons we can do using branch
14141 instructions. BYPASS_CODE is comparison code for branch that will
14142 branch around FIRST_CODE and SECOND_CODE. If some of branches
14143 is not required, set value to UNKNOWN.
14144 We never require more than two branches. */
14147 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
14148 enum rtx_code *first_code,
14149 enum rtx_code *second_code)
14151 *first_code = code;
14152 *bypass_code = UNKNOWN;
14153 *second_code = UNKNOWN;
14155 /* The fcomi comparison sets flags as follows:
14165 case GT: /* GTU - CF=0 & ZF=0 */
14166 case GE: /* GEU - CF=0 */
14167 case ORDERED: /* PF=0 */
14168 case UNORDERED: /* PF=1 */
14169 case UNEQ: /* EQ - ZF=1 */
14170 case UNLT: /* LTU - CF=1 */
14171 case UNLE: /* LEU - CF=1 | ZF=1 */
14172 case LTGT: /* EQ - ZF=0 */
14174 case LT: /* LTU - CF=1 - fails on unordered */
14175 *first_code = UNLT;
14176 *bypass_code = UNORDERED;
14178 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
14179 *first_code = UNLE;
14180 *bypass_code = UNORDERED;
14182 case EQ: /* EQ - ZF=1 - fails on unordered */
14183 *first_code = UNEQ;
14184 *bypass_code = UNORDERED;
14186 case NE: /* NE - ZF=0 - fails on unordered */
14187 *first_code = LTGT;
14188 *second_code = UNORDERED;
14190 case UNGE: /* GEU - CF=0 - fails on unordered */
14192 *second_code = UNORDERED;
14194 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
14196 *second_code = UNORDERED;
14199 gcc_unreachable ();
14201 if (!TARGET_IEEE_FP)
14203 *second_code = UNKNOWN;
14204 *bypass_code = UNKNOWN;
14208 /* Return cost of comparison done fcom + arithmetics operations on AX.
14209 All following functions do use number of instructions as a cost metrics.
14210 In future this should be tweaked to compute bytes for optimize_size and
14211 take into account performance of various instructions on various CPUs. */
14213 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
14215 if (!TARGET_IEEE_FP)
14217 /* The cost of code output by ix86_expand_fp_compare. */
14241 gcc_unreachable ();
14245 /* Return cost of comparison done using fcomi operation.
14246 See ix86_fp_comparison_arithmetics_cost for the metrics. */
14248 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
14250 enum rtx_code bypass_code, first_code, second_code;
14251 /* Return arbitrarily high cost when instruction is not supported - this
14252 prevents gcc from using it. */
14255 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14256 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
14259 /* Return cost of comparison done using sahf operation.
14260 See ix86_fp_comparison_arithmetics_cost for the metrics. */
14262 ix86_fp_comparison_sahf_cost (enum rtx_code code)
14264 enum rtx_code bypass_code, first_code, second_code;
14265 /* Return arbitrarily high cost when instruction is not preferred - this
14266 avoids gcc from using it. */
14267 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ())))
14269 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14270 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
14273 /* Compute cost of the comparison done using any method.
14274 See ix86_fp_comparison_arithmetics_cost for the metrics. */
14276 ix86_fp_comparison_cost (enum rtx_code code)
14278 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
14281 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
14282 sahf_cost = ix86_fp_comparison_sahf_cost (code);
14284 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
14285 if (min > sahf_cost)
14287 if (min > fcomi_cost)
14292 /* Return true if we should use an FCOMI instruction for this
14296 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
14298 enum rtx_code swapped_code = swap_condition (code);
14300 return ((ix86_fp_comparison_cost (code)
14301 == ix86_fp_comparison_fcomi_cost (code))
14302 || (ix86_fp_comparison_cost (swapped_code)
14303 == ix86_fp_comparison_fcomi_cost (swapped_code)));
14306 /* Swap, force into registers, or otherwise massage the two operands
14307 to a fp comparison. The operands are updated in place; the new
14308 comparison code is returned. */
14310 static enum rtx_code
14311 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
14313 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
14314 rtx op0 = *pop0, op1 = *pop1;
14315 enum machine_mode op_mode = GET_MODE (op0);
14316 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
14318 /* All of the unordered compare instructions only work on registers.
14319 The same is true of the fcomi compare instructions. The XFmode
14320 compare instructions require registers except when comparing
14321 against zero or when converting operand 1 from fixed point to
14325 && (fpcmp_mode == CCFPUmode
14326 || (op_mode == XFmode
14327 && ! (standard_80387_constant_p (op0) == 1
14328 || standard_80387_constant_p (op1) == 1)
14329 && GET_CODE (op1) != FLOAT)
14330 || ix86_use_fcomi_compare (code)))
14332 op0 = force_reg (op_mode, op0);
14333 op1 = force_reg (op_mode, op1);
14337 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
14338 things around if they appear profitable, otherwise force op0
14339 into a register. */
14341 if (standard_80387_constant_p (op0) == 0
14343 && ! (standard_80387_constant_p (op1) == 0
14347 tmp = op0, op0 = op1, op1 = tmp;
14348 code = swap_condition (code);
14352 op0 = force_reg (op_mode, op0);
14354 if (CONSTANT_P (op1))
14356 int tmp = standard_80387_constant_p (op1);
14358 op1 = validize_mem (force_const_mem (op_mode, op1));
14362 op1 = force_reg (op_mode, op1);
14365 op1 = force_reg (op_mode, op1);
14369 /* Try to rearrange the comparison to make it cheaper. */
14370 if (ix86_fp_comparison_cost (code)
14371 > ix86_fp_comparison_cost (swap_condition (code))
14372 && (REG_P (op1) || can_create_pseudo_p ()))
14375 tmp = op0, op0 = op1, op1 = tmp;
14376 code = swap_condition (code);
14378 op0 = force_reg (op_mode, op0);
14386 /* Convert comparison codes we use to represent FP comparison to integer
14387 code that will result in proper branch. Return UNKNOWN if no such code
14391 ix86_fp_compare_code_to_integer (enum rtx_code code)
14420 /* Generate insn patterns to do a floating point compare of OPERANDS. */
14423 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
14424 rtx *second_test, rtx *bypass_test)
14426 enum machine_mode fpcmp_mode, intcmp_mode;
14428 int cost = ix86_fp_comparison_cost (code);
14429 enum rtx_code bypass_code, first_code, second_code;
14431 fpcmp_mode = ix86_fp_compare_mode (code);
14432 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
14435 *second_test = NULL_RTX;
14437 *bypass_test = NULL_RTX;
14439 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14441 /* Do fcomi/sahf based test when profitable. */
14442 if (ix86_fp_comparison_arithmetics_cost (code) > cost
14443 && (bypass_code == UNKNOWN || bypass_test)
14444 && (second_code == UNKNOWN || second_test))
14446 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14447 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
14453 gcc_assert (TARGET_SAHF);
14456 scratch = gen_reg_rtx (HImode);
14457 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
14459 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
14462 /* The FP codes work out to act like unsigned. */
14463 intcmp_mode = fpcmp_mode;
14465 if (bypass_code != UNKNOWN)
14466 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
14467 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14469 if (second_code != UNKNOWN)
14470 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
14471 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14476 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
14477 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14478 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
14480 scratch = gen_reg_rtx (HImode);
14481 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
14483 /* In the unordered case, we have to check C2 for NaN's, which
14484 doesn't happen to work out to anything nice combination-wise.
14485 So do some bit twiddling on the value we've got in AH to come
14486 up with an appropriate set of condition codes. */
14488 intcmp_mode = CCNOmode;
14493 if (code == GT || !TARGET_IEEE_FP)
14495 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14500 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14501 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14502 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
14503 intcmp_mode = CCmode;
14509 if (code == LT && TARGET_IEEE_FP)
14511 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14512 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
14513 intcmp_mode = CCmode;
14518 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
14524 if (code == GE || !TARGET_IEEE_FP)
14526 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
14531 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14532 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
14539 if (code == LE && TARGET_IEEE_FP)
14541 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14542 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14543 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14544 intcmp_mode = CCmode;
14549 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14555 if (code == EQ && TARGET_IEEE_FP)
14557 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14558 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14559 intcmp_mode = CCmode;
14564 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14571 if (code == NE && TARGET_IEEE_FP)
14573 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14574 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
14580 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14586 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14590 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14595 gcc_unreachable ();
14599 /* Return the test that should be put into the flags user, i.e.
14600 the bcc, scc, or cmov instruction. */
14601 return gen_rtx_fmt_ee (code, VOIDmode,
14602 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14607 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
14610 op0 = ix86_compare_op0;
14611 op1 = ix86_compare_op1;
14614 *second_test = NULL_RTX;
14616 *bypass_test = NULL_RTX;
14618 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC)
14619 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_op0, ix86_compare_op1);
14621 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
14623 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
14624 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
14625 second_test, bypass_test);
14628 ret = ix86_expand_int_compare (code, op0, op1);
14633 /* Return true if the CODE will result in nontrivial jump sequence. */
14635 ix86_fp_jump_nontrivial_p (enum rtx_code code)
14637 enum rtx_code bypass_code, first_code, second_code;
14640 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14641 return bypass_code != UNKNOWN || second_code != UNKNOWN;
14645 ix86_expand_branch (enum rtx_code code, rtx label)
14649 switch (GET_MODE (ix86_compare_op0))
14655 tmp = ix86_expand_compare (code, NULL, NULL);
14656 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14657 gen_rtx_LABEL_REF (VOIDmode, label),
14659 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
14668 enum rtx_code bypass_code, first_code, second_code;
14670 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
14671 &ix86_compare_op1);
14673 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14675 /* Check whether we will use the natural sequence with one jump. If
14676 so, we can expand jump early. Otherwise delay expansion by
14677 creating compound insn to not confuse optimizers. */
14678 if (bypass_code == UNKNOWN && second_code == UNKNOWN)
14680 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
14681 gen_rtx_LABEL_REF (VOIDmode, label),
14682 pc_rtx, NULL_RTX, NULL_RTX);
14686 tmp = gen_rtx_fmt_ee (code, VOIDmode,
14687 ix86_compare_op0, ix86_compare_op1);
14688 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14689 gen_rtx_LABEL_REF (VOIDmode, label),
14691 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
14693 use_fcomi = ix86_use_fcomi_compare (code);
14694 vec = rtvec_alloc (3 + !use_fcomi);
14695 RTVEC_ELT (vec, 0) = tmp;
14697 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
14699 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
14702 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
14704 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
14713 /* Expand DImode branch into multiple compare+branch. */
14715 rtx lo[2], hi[2], label2;
14716 enum rtx_code code1, code2, code3;
14717 enum machine_mode submode;
14719 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
14721 tmp = ix86_compare_op0;
14722 ix86_compare_op0 = ix86_compare_op1;
14723 ix86_compare_op1 = tmp;
14724 code = swap_condition (code);
14726 if (GET_MODE (ix86_compare_op0) == DImode)
14728 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
14729 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
14734 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
14735 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
14739 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
14740 avoid two branches. This costs one extra insn, so disable when
14741 optimizing for size. */
14743 if ((code == EQ || code == NE)
14744 && (!optimize_insn_for_size_p ()
14745 || hi[1] == const0_rtx || lo[1] == const0_rtx))
14750 if (hi[1] != const0_rtx)
14751 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
14752 NULL_RTX, 0, OPTAB_WIDEN);
14755 if (lo[1] != const0_rtx)
14756 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
14757 NULL_RTX, 0, OPTAB_WIDEN);
14759 tmp = expand_binop (submode, ior_optab, xor1, xor0,
14760 NULL_RTX, 0, OPTAB_WIDEN);
14762 ix86_compare_op0 = tmp;
14763 ix86_compare_op1 = const0_rtx;
14764 ix86_expand_branch (code, label);
14768 /* Otherwise, if we are doing less-than or greater-or-equal-than,
14769 op1 is a constant and the low word is zero, then we can just
14770 examine the high word. Similarly for low word -1 and
14771 less-or-equal-than or greater-than. */
14773 if (CONST_INT_P (hi[1]))
14776 case LT: case LTU: case GE: case GEU:
14777 if (lo[1] == const0_rtx)
14779 ix86_compare_op0 = hi[0];
14780 ix86_compare_op1 = hi[1];
14781 ix86_expand_branch (code, label);
14785 case LE: case LEU: case GT: case GTU:
14786 if (lo[1] == constm1_rtx)
14788 ix86_compare_op0 = hi[0];
14789 ix86_compare_op1 = hi[1];
14790 ix86_expand_branch (code, label);
14798 /* Otherwise, we need two or three jumps. */
14800 label2 = gen_label_rtx ();
14803 code2 = swap_condition (code);
14804 code3 = unsigned_condition (code);
14808 case LT: case GT: case LTU: case GTU:
14811 case LE: code1 = LT; code2 = GT; break;
14812 case GE: code1 = GT; code2 = LT; break;
14813 case LEU: code1 = LTU; code2 = GTU; break;
14814 case GEU: code1 = GTU; code2 = LTU; break;
14816 case EQ: code1 = UNKNOWN; code2 = NE; break;
14817 case NE: code2 = UNKNOWN; break;
14820 gcc_unreachable ();
14825 * if (hi(a) < hi(b)) goto true;
14826 * if (hi(a) > hi(b)) goto false;
14827 * if (lo(a) < lo(b)) goto true;
14831 ix86_compare_op0 = hi[0];
14832 ix86_compare_op1 = hi[1];
14834 if (code1 != UNKNOWN)
14835 ix86_expand_branch (code1, label);
14836 if (code2 != UNKNOWN)
14837 ix86_expand_branch (code2, label2);
14839 ix86_compare_op0 = lo[0];
14840 ix86_compare_op1 = lo[1];
14841 ix86_expand_branch (code3, label);
14843 if (code2 != UNKNOWN)
14844 emit_label (label2);
14849 /* If we have already emitted a compare insn, go straight to simple.
14850 ix86_expand_compare won't emit anything if ix86_compare_emitted
14852 gcc_assert (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC);
14857 /* Split branch based on floating point condition. */
14859 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
14860 rtx target1, rtx target2, rtx tmp, rtx pushed)
14862 rtx second, bypass;
14863 rtx label = NULL_RTX;
14865 int bypass_probability = -1, second_probability = -1, probability = -1;
14868 if (target2 != pc_rtx)
14871 code = reverse_condition_maybe_unordered (code);
14876 condition = ix86_expand_fp_compare (code, op1, op2,
14877 tmp, &second, &bypass);
14879 /* Remove pushed operand from stack. */
14881 ix86_free_from_memory (GET_MODE (pushed));
14883 if (split_branch_probability >= 0)
14885 /* Distribute the probabilities across the jumps.
14886 Assume the BYPASS and SECOND to be always test
14888 probability = split_branch_probability;
14890 /* Value of 1 is low enough to make no need for probability
14891 to be updated. Later we may run some experiments and see
14892 if unordered values are more frequent in practice. */
14894 bypass_probability = 1;
14896 second_probability = 1;
14898 if (bypass != NULL_RTX)
14900 label = gen_label_rtx ();
14901 i = emit_jump_insn (gen_rtx_SET
14903 gen_rtx_IF_THEN_ELSE (VOIDmode,
14905 gen_rtx_LABEL_REF (VOIDmode,
14908 if (bypass_probability >= 0)
14909 add_reg_note (i, REG_BR_PROB, GEN_INT (bypass_probability));
14911 i = emit_jump_insn (gen_rtx_SET
14913 gen_rtx_IF_THEN_ELSE (VOIDmode,
14914 condition, target1, target2)));
14915 if (probability >= 0)
14916 add_reg_note (i, REG_BR_PROB, GEN_INT (probability));
14917 if (second != NULL_RTX)
14919 i = emit_jump_insn (gen_rtx_SET
14921 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
14923 if (second_probability >= 0)
14924 add_reg_note (i, REG_BR_PROB, GEN_INT (second_probability));
14926 if (label != NULL_RTX)
14927 emit_label (label);
14931 ix86_expand_setcc (enum rtx_code code, rtx dest)
14933 rtx ret, tmp, tmpreg, equiv;
14934 rtx second_test, bypass_test;
14936 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
14937 return 0; /* FAIL */
14939 gcc_assert (GET_MODE (dest) == QImode);
14941 ret = ix86_expand_compare (code, &second_test, &bypass_test);
14942 PUT_MODE (ret, QImode);
14947 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
14948 if (bypass_test || second_test)
14950 rtx test = second_test;
14952 rtx tmp2 = gen_reg_rtx (QImode);
14955 gcc_assert (!second_test);
14956 test = bypass_test;
14958 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
14960 PUT_MODE (test, QImode);
14961 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
14964 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
14966 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
14969 /* Attach a REG_EQUAL note describing the comparison result. */
14970 if (ix86_compare_op0 && ix86_compare_op1)
14972 equiv = simplify_gen_relational (code, QImode,
14973 GET_MODE (ix86_compare_op0),
14974 ix86_compare_op0, ix86_compare_op1);
14975 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
14978 return 1; /* DONE */
14981 /* Expand comparison setting or clearing carry flag. Return true when
14982 successful and set pop for the operation. */
14984 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
14986 enum machine_mode mode =
14987 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
14989 /* Do not handle DImode compares that go through special path. */
14990 if (mode == (TARGET_64BIT ? TImode : DImode))
14993 if (SCALAR_FLOAT_MODE_P (mode))
14995 rtx second_test = NULL, bypass_test = NULL;
14996 rtx compare_op, compare_seq;
14998 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
15000 /* Shortcut: following common codes never translate
15001 into carry flag compares. */
15002 if (code == EQ || code == NE || code == UNEQ || code == LTGT
15003 || code == ORDERED || code == UNORDERED)
15006 /* These comparisons require zero flag; swap operands so they won't. */
15007 if ((code == GT || code == UNLE || code == LE || code == UNGT)
15008 && !TARGET_IEEE_FP)
15013 code = swap_condition (code);
15016 /* Try to expand the comparison and verify that we end up with
15017 carry flag based comparison. This fails to be true only when
15018 we decide to expand comparison using arithmetic that is not
15019 too common scenario. */
15021 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
15022 &second_test, &bypass_test);
15023 compare_seq = get_insns ();
15026 if (second_test || bypass_test)
15029 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
15030 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
15031 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
15033 code = GET_CODE (compare_op);
15035 if (code != LTU && code != GEU)
15038 emit_insn (compare_seq);
15043 if (!INTEGRAL_MODE_P (mode))
15052 /* Convert a==0 into (unsigned)a<1. */
15055 if (op1 != const0_rtx)
15058 code = (code == EQ ? LTU : GEU);
15061 /* Convert a>b into b<a or a>=b-1. */
15064 if (CONST_INT_P (op1))
15066 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
15067 /* Bail out on overflow. We still can swap operands but that
15068 would force loading of the constant into register. */
15069 if (op1 == const0_rtx
15070 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
15072 code = (code == GTU ? GEU : LTU);
15079 code = (code == GTU ? LTU : GEU);
15083 /* Convert a>=0 into (unsigned)a<0x80000000. */
15086 if (mode == DImode || op1 != const0_rtx)
15088 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
15089 code = (code == LT ? GEU : LTU);
15093 if (mode == DImode || op1 != constm1_rtx)
15095 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
15096 code = (code == LE ? GEU : LTU);
15102 /* Swapping operands may cause constant to appear as first operand. */
15103 if (!nonimmediate_operand (op0, VOIDmode))
15105 if (!can_create_pseudo_p ())
15107 op0 = force_reg (mode, op0);
15109 ix86_compare_op0 = op0;
15110 ix86_compare_op1 = op1;
15111 *pop = ix86_expand_compare (code, NULL, NULL);
15112 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
15117 ix86_expand_int_movcc (rtx operands[])
15119 enum rtx_code code = GET_CODE (operands[1]), compare_code;
15120 rtx compare_seq, compare_op;
15121 rtx second_test, bypass_test;
15122 enum machine_mode mode = GET_MODE (operands[0]);
15123 bool sign_bit_compare_p = false;;
15126 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15127 compare_seq = get_insns ();
15130 compare_code = GET_CODE (compare_op);
15132 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
15133 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
15134 sign_bit_compare_p = true;
15136 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
15137 HImode insns, we'd be swallowed in word prefix ops. */
15139 if ((mode != HImode || TARGET_FAST_PREFIX)
15140 && (mode != (TARGET_64BIT ? TImode : DImode))
15141 && CONST_INT_P (operands[2])
15142 && CONST_INT_P (operands[3]))
15144 rtx out = operands[0];
15145 HOST_WIDE_INT ct = INTVAL (operands[2]);
15146 HOST_WIDE_INT cf = INTVAL (operands[3]);
15147 HOST_WIDE_INT diff;
15150 /* Sign bit compares are better done using shifts than we do by using
15152 if (sign_bit_compare_p
15153 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
15154 ix86_compare_op1, &compare_op))
15156 /* Detect overlap between destination and compare sources. */
15159 if (!sign_bit_compare_p)
15161 bool fpcmp = false;
15163 compare_code = GET_CODE (compare_op);
15165 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
15166 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
15169 compare_code = ix86_fp_compare_code_to_integer (compare_code);
15172 /* To simplify rest of code, restrict to the GEU case. */
15173 if (compare_code == LTU)
15175 HOST_WIDE_INT tmp = ct;
15178 compare_code = reverse_condition (compare_code);
15179 code = reverse_condition (code);
15184 PUT_CODE (compare_op,
15185 reverse_condition_maybe_unordered
15186 (GET_CODE (compare_op)));
15188 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
15192 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
15193 || reg_overlap_mentioned_p (out, ix86_compare_op1))
15194 tmp = gen_reg_rtx (mode);
15196 if (mode == DImode)
15197 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
15199 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
15203 if (code == GT || code == GE)
15204 code = reverse_condition (code);
15207 HOST_WIDE_INT tmp = ct;
15212 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
15213 ix86_compare_op1, VOIDmode, 0, -1);
15226 tmp = expand_simple_binop (mode, PLUS,
15228 copy_rtx (tmp), 1, OPTAB_DIRECT);
15239 tmp = expand_simple_binop (mode, IOR,
15241 copy_rtx (tmp), 1, OPTAB_DIRECT);
15243 else if (diff == -1 && ct)
15253 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
15255 tmp = expand_simple_binop (mode, PLUS,
15256 copy_rtx (tmp), GEN_INT (cf),
15257 copy_rtx (tmp), 1, OPTAB_DIRECT);
15265 * andl cf - ct, dest
15275 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
15278 tmp = expand_simple_binop (mode, AND,
15280 gen_int_mode (cf - ct, mode),
15281 copy_rtx (tmp), 1, OPTAB_DIRECT);
15283 tmp = expand_simple_binop (mode, PLUS,
15284 copy_rtx (tmp), GEN_INT (ct),
15285 copy_rtx (tmp), 1, OPTAB_DIRECT);
15288 if (!rtx_equal_p (tmp, out))
15289 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
15291 return 1; /* DONE */
15296 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15299 tmp = ct, ct = cf, cf = tmp;
15302 if (SCALAR_FLOAT_MODE_P (cmp_mode))
15304 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15306 /* We may be reversing unordered compare to normal compare, that
15307 is not valid in general (we may convert non-trapping condition
15308 to trapping one), however on i386 we currently emit all
15309 comparisons unordered. */
15310 compare_code = reverse_condition_maybe_unordered (compare_code);
15311 code = reverse_condition_maybe_unordered (code);
15315 compare_code = reverse_condition (compare_code);
15316 code = reverse_condition (code);
15320 compare_code = UNKNOWN;
15321 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
15322 && CONST_INT_P (ix86_compare_op1))
15324 if (ix86_compare_op1 == const0_rtx
15325 && (code == LT || code == GE))
15326 compare_code = code;
15327 else if (ix86_compare_op1 == constm1_rtx)
15331 else if (code == GT)
15336 /* Optimize dest = (op0 < 0) ? -1 : cf. */
15337 if (compare_code != UNKNOWN
15338 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
15339 && (cf == -1 || ct == -1))
15341 /* If lea code below could be used, only optimize
15342 if it results in a 2 insn sequence. */
15344 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
15345 || diff == 3 || diff == 5 || diff == 9)
15346 || (compare_code == LT && ct == -1)
15347 || (compare_code == GE && cf == -1))
15350 * notl op1 (if necessary)
15358 code = reverse_condition (code);
15361 out = emit_store_flag (out, code, ix86_compare_op0,
15362 ix86_compare_op1, VOIDmode, 0, -1);
15364 out = expand_simple_binop (mode, IOR,
15366 out, 1, OPTAB_DIRECT);
15367 if (out != operands[0])
15368 emit_move_insn (operands[0], out);
15370 return 1; /* DONE */
15375 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
15376 || diff == 3 || diff == 5 || diff == 9)
15377 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
15379 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
15385 * lea cf(dest*(ct-cf)),dest
15389 * This also catches the degenerate setcc-only case.
15395 out = emit_store_flag (out, code, ix86_compare_op0,
15396 ix86_compare_op1, VOIDmode, 0, 1);
15399 /* On x86_64 the lea instruction operates on Pmode, so we need
15400 to get arithmetics done in proper mode to match. */
15402 tmp = copy_rtx (out);
15406 out1 = copy_rtx (out);
15407 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
15411 tmp = gen_rtx_PLUS (mode, tmp, out1);
15417 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
15420 if (!rtx_equal_p (tmp, out))
15423 out = force_operand (tmp, copy_rtx (out));
15425 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
15427 if (!rtx_equal_p (out, operands[0]))
15428 emit_move_insn (operands[0], copy_rtx (out));
15430 return 1; /* DONE */
15434 * General case: Jumpful:
15435 * xorl dest,dest cmpl op1, op2
15436 * cmpl op1, op2 movl ct, dest
15437 * setcc dest jcc 1f
15438 * decl dest movl cf, dest
15439 * andl (cf-ct),dest 1:
15442 * Size 20. Size 14.
15444 * This is reasonably steep, but branch mispredict costs are
15445 * high on modern cpus, so consider failing only if optimizing
15449 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15450 && BRANCH_COST (optimize_insn_for_speed_p (),
15455 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15460 if (SCALAR_FLOAT_MODE_P (cmp_mode))
15462 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15464 /* We may be reversing unordered compare to normal compare,
15465 that is not valid in general (we may convert non-trapping
15466 condition to trapping one), however on i386 we currently
15467 emit all comparisons unordered. */
15468 code = reverse_condition_maybe_unordered (code);
15472 code = reverse_condition (code);
15473 if (compare_code != UNKNOWN)
15474 compare_code = reverse_condition (compare_code);
15478 if (compare_code != UNKNOWN)
15480 /* notl op1 (if needed)
15485 For x < 0 (resp. x <= -1) there will be no notl,
15486 so if possible swap the constants to get rid of the
15488 True/false will be -1/0 while code below (store flag
15489 followed by decrement) is 0/-1, so the constants need
15490 to be exchanged once more. */
15492 if (compare_code == GE || !cf)
15494 code = reverse_condition (code);
15499 HOST_WIDE_INT tmp = cf;
15504 out = emit_store_flag (out, code, ix86_compare_op0,
15505 ix86_compare_op1, VOIDmode, 0, -1);
15509 out = emit_store_flag (out, code, ix86_compare_op0,
15510 ix86_compare_op1, VOIDmode, 0, 1);
15512 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
15513 copy_rtx (out), 1, OPTAB_DIRECT);
15516 out = expand_simple_binop (mode, AND, copy_rtx (out),
15517 gen_int_mode (cf - ct, mode),
15518 copy_rtx (out), 1, OPTAB_DIRECT);
15520 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
15521 copy_rtx (out), 1, OPTAB_DIRECT);
15522 if (!rtx_equal_p (out, operands[0]))
15523 emit_move_insn (operands[0], copy_rtx (out));
15525 return 1; /* DONE */
15529 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15531 /* Try a few things more with specific constants and a variable. */
15534 rtx var, orig_out, out, tmp;
15536 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
15537 return 0; /* FAIL */
15539 /* If one of the two operands is an interesting constant, load a
15540 constant with the above and mask it in with a logical operation. */
15542 if (CONST_INT_P (operands[2]))
15545 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
15546 operands[3] = constm1_rtx, op = and_optab;
15547 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
15548 operands[3] = const0_rtx, op = ior_optab;
15550 return 0; /* FAIL */
15552 else if (CONST_INT_P (operands[3]))
15555 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
15556 operands[2] = constm1_rtx, op = and_optab;
15557 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
15558 operands[2] = const0_rtx, op = ior_optab;
15560 return 0; /* FAIL */
15563 return 0; /* FAIL */
15565 orig_out = operands[0];
15566 tmp = gen_reg_rtx (mode);
15569 /* Recurse to get the constant loaded. */
15570 if (ix86_expand_int_movcc (operands) == 0)
15571 return 0; /* FAIL */
15573 /* Mask in the interesting variable. */
15574 out = expand_binop (mode, op, var, tmp, orig_out, 0,
15576 if (!rtx_equal_p (out, orig_out))
15577 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
15579 return 1; /* DONE */
15583 * For comparison with above,
15593 if (! nonimmediate_operand (operands[2], mode))
15594 operands[2] = force_reg (mode, operands[2]);
15595 if (! nonimmediate_operand (operands[3], mode))
15596 operands[3] = force_reg (mode, operands[3]);
15598 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
15600 rtx tmp = gen_reg_rtx (mode);
15601 emit_move_insn (tmp, operands[3]);
15604 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
15606 rtx tmp = gen_reg_rtx (mode);
15607 emit_move_insn (tmp, operands[2]);
15611 if (! register_operand (operands[2], VOIDmode)
15613 || ! register_operand (operands[3], VOIDmode)))
15614 operands[2] = force_reg (mode, operands[2]);
15617 && ! register_operand (operands[3], VOIDmode))
15618 operands[3] = force_reg (mode, operands[3]);
15620 emit_insn (compare_seq);
15621 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15622 gen_rtx_IF_THEN_ELSE (mode,
15623 compare_op, operands[2],
15626 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
15627 gen_rtx_IF_THEN_ELSE (mode,
15629 copy_rtx (operands[3]),
15630 copy_rtx (operands[0]))));
15632 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
15633 gen_rtx_IF_THEN_ELSE (mode,
15635 copy_rtx (operands[2]),
15636 copy_rtx (operands[0]))));
15638 return 1; /* DONE */
15641 /* Swap, force into registers, or otherwise massage the two operands
15642 to an sse comparison with a mask result. Thus we differ a bit from
15643 ix86_prepare_fp_compare_args which expects to produce a flags result.
15645 The DEST operand exists to help determine whether to commute commutative
15646 operators. The POP0/POP1 operands are updated in place. The new
15647 comparison code is returned, or UNKNOWN if not implementable. */
15649 static enum rtx_code
15650 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
15651 rtx *pop0, rtx *pop1)
15659 /* We have no LTGT as an operator. We could implement it with
15660 NE & ORDERED, but this requires an extra temporary. It's
15661 not clear that it's worth it. */
15668 /* These are supported directly. */
15675 /* For commutative operators, try to canonicalize the destination
15676 operand to be first in the comparison - this helps reload to
15677 avoid extra moves. */
15678 if (!dest || !rtx_equal_p (dest, *pop1))
15686 /* These are not supported directly. Swap the comparison operands
15687 to transform into something that is supported. */
15691 code = swap_condition (code);
15695 gcc_unreachable ();
15701 /* Detect conditional moves that exactly match min/max operational
15702 semantics. Note that this is IEEE safe, as long as we don't
15703 interchange the operands.
15705 Returns FALSE if this conditional move doesn't match a MIN/MAX,
15706 and TRUE if the operation is successful and instructions are emitted. */
15709 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
15710 rtx cmp_op1, rtx if_true, rtx if_false)
15712 enum machine_mode mode;
15718 else if (code == UNGE)
15721 if_true = if_false;
15727 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
15729 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
15734 mode = GET_MODE (dest);
15736 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
15737 but MODE may be a vector mode and thus not appropriate. */
15738 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
15740 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
15743 if_true = force_reg (mode, if_true);
15744 v = gen_rtvec (2, if_true, if_false);
15745 tmp = gen_rtx_UNSPEC (mode, v, u);
15749 code = is_min ? SMIN : SMAX;
15750 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
15753 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
15757 /* Expand an sse vector comparison. Return the register with the result. */
15760 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
15761 rtx op_true, rtx op_false)
15763 enum machine_mode mode = GET_MODE (dest);
15766 cmp_op0 = force_reg (mode, cmp_op0);
15767 if (!nonimmediate_operand (cmp_op1, mode))
15768 cmp_op1 = force_reg (mode, cmp_op1);
15771 || reg_overlap_mentioned_p (dest, op_true)
15772 || reg_overlap_mentioned_p (dest, op_false))
15773 dest = gen_reg_rtx (mode);
15775 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
15776 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15781 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
15782 operations. This is used for both scalar and vector conditional moves. */
15785 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
15787 enum machine_mode mode = GET_MODE (dest);
15790 if (op_false == CONST0_RTX (mode))
15792 op_true = force_reg (mode, op_true);
15793 x = gen_rtx_AND (mode, cmp, op_true);
15794 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15796 else if (op_true == CONST0_RTX (mode))
15798 op_false = force_reg (mode, op_false);
15799 x = gen_rtx_NOT (mode, cmp);
15800 x = gen_rtx_AND (mode, x, op_false);
15801 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15803 else if (TARGET_SSE5)
15805 rtx pcmov = gen_rtx_SET (mode, dest,
15806 gen_rtx_IF_THEN_ELSE (mode, cmp,
15813 op_true = force_reg (mode, op_true);
15814 op_false = force_reg (mode, op_false);
15816 t2 = gen_reg_rtx (mode);
15818 t3 = gen_reg_rtx (mode);
15822 x = gen_rtx_AND (mode, op_true, cmp);
15823 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
15825 x = gen_rtx_NOT (mode, cmp);
15826 x = gen_rtx_AND (mode, x, op_false);
15827 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
15829 x = gen_rtx_IOR (mode, t3, t2);
15830 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15834 /* Expand a floating-point conditional move. Return true if successful. */
15837 ix86_expand_fp_movcc (rtx operands[])
15839 enum machine_mode mode = GET_MODE (operands[0]);
15840 enum rtx_code code = GET_CODE (operands[1]);
15841 rtx tmp, compare_op, second_test, bypass_test;
15843 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
15845 enum machine_mode cmode;
15847 /* Since we've no cmove for sse registers, don't force bad register
15848 allocation just to gain access to it. Deny movcc when the
15849 comparison mode doesn't match the move mode. */
15850 cmode = GET_MODE (ix86_compare_op0);
15851 if (cmode == VOIDmode)
15852 cmode = GET_MODE (ix86_compare_op1);
15856 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15858 &ix86_compare_op1);
15859 if (code == UNKNOWN)
15862 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
15863 ix86_compare_op1, operands[2],
15867 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
15868 ix86_compare_op1, operands[2], operands[3]);
15869 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
15873 /* The floating point conditional move instructions don't directly
15874 support conditions resulting from a signed integer comparison. */
15876 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15878 /* The floating point conditional move instructions don't directly
15879 support signed integer comparisons. */
15881 if (!fcmov_comparison_operator (compare_op, VOIDmode))
15883 gcc_assert (!second_test && !bypass_test);
15884 tmp = gen_reg_rtx (QImode);
15885 ix86_expand_setcc (code, tmp);
15887 ix86_compare_op0 = tmp;
15888 ix86_compare_op1 = const0_rtx;
15889 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15891 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
15893 tmp = gen_reg_rtx (mode);
15894 emit_move_insn (tmp, operands[3]);
15897 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
15899 tmp = gen_reg_rtx (mode);
15900 emit_move_insn (tmp, operands[2]);
15904 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15905 gen_rtx_IF_THEN_ELSE (mode, compare_op,
15906 operands[2], operands[3])));
15908 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15909 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
15910 operands[3], operands[0])));
15912 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15913 gen_rtx_IF_THEN_ELSE (mode, second_test,
15914 operands[2], operands[0])));
15919 /* Expand a floating-point vector conditional move; a vcond operation
15920 rather than a movcc operation. */
15923 ix86_expand_fp_vcond (rtx operands[])
15925 enum rtx_code code = GET_CODE (operands[3]);
15928 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15929 &operands[4], &operands[5]);
15930 if (code == UNKNOWN)
15933 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
15934 operands[5], operands[1], operands[2]))
15937 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
15938 operands[1], operands[2]);
15939 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
15943 /* Expand a signed/unsigned integral vector conditional move. */
15946 ix86_expand_int_vcond (rtx operands[])
15948 enum machine_mode mode = GET_MODE (operands[0]);
15949 enum rtx_code code = GET_CODE (operands[3]);
15950 bool negate = false;
15953 cop0 = operands[4];
15954 cop1 = operands[5];
15956 /* SSE5 supports all of the comparisons on all vector int types. */
15959 /* Canonicalize the comparison to EQ, GT, GTU. */
15970 code = reverse_condition (code);
15976 code = reverse_condition (code);
15982 code = swap_condition (code);
15983 x = cop0, cop0 = cop1, cop1 = x;
15987 gcc_unreachable ();
15990 /* Only SSE4.1/SSE4.2 supports V2DImode. */
15991 if (mode == V2DImode)
15996 /* SSE4.1 supports EQ. */
15997 if (!TARGET_SSE4_1)
16003 /* SSE4.2 supports GT/GTU. */
16004 if (!TARGET_SSE4_2)
16009 gcc_unreachable ();
16013 /* Unsigned parallel compare is not supported by the hardware. Play some
16014 tricks to turn this into a signed comparison against 0. */
16017 cop0 = force_reg (mode, cop0);
16026 /* Perform a parallel modulo subtraction. */
16027 t1 = gen_reg_rtx (mode);
16028 emit_insn ((mode == V4SImode
16030 : gen_subv2di3) (t1, cop0, cop1));
16032 /* Extract the original sign bit of op0. */
16033 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
16035 t2 = gen_reg_rtx (mode);
16036 emit_insn ((mode == V4SImode
16038 : gen_andv2di3) (t2, cop0, mask));
16040 /* XOR it back into the result of the subtraction. This results
16041 in the sign bit set iff we saw unsigned underflow. */
16042 x = gen_reg_rtx (mode);
16043 emit_insn ((mode == V4SImode
16045 : gen_xorv2di3) (x, t1, t2));
16053 /* Perform a parallel unsigned saturating subtraction. */
16054 x = gen_reg_rtx (mode);
16055 emit_insn (gen_rtx_SET (VOIDmode, x,
16056 gen_rtx_US_MINUS (mode, cop0, cop1)));
16063 gcc_unreachable ();
16067 cop1 = CONST0_RTX (mode);
16071 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
16072 operands[1+negate], operands[2-negate]);
16074 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
16075 operands[2-negate]);
16079 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
16080 true if we should do zero extension, else sign extension. HIGH_P is
16081 true if we want the N/2 high elements, else the low elements. */
16084 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16086 enum machine_mode imode = GET_MODE (operands[1]);
16087 rtx (*unpack)(rtx, rtx, rtx);
16094 unpack = gen_vec_interleave_highv16qi;
16096 unpack = gen_vec_interleave_lowv16qi;
16100 unpack = gen_vec_interleave_highv8hi;
16102 unpack = gen_vec_interleave_lowv8hi;
16106 unpack = gen_vec_interleave_highv4si;
16108 unpack = gen_vec_interleave_lowv4si;
16111 gcc_unreachable ();
16114 dest = gen_lowpart (imode, operands[0]);
16117 se = force_reg (imode, CONST0_RTX (imode));
16119 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
16120 operands[1], pc_rtx, pc_rtx);
16122 emit_insn (unpack (dest, operands[1], se));
16125 /* This function performs the same task as ix86_expand_sse_unpack,
16126 but with SSE4.1 instructions. */
16129 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16131 enum machine_mode imode = GET_MODE (operands[1]);
16132 rtx (*unpack)(rtx, rtx);
16139 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
16141 unpack = gen_sse4_1_extendv8qiv8hi2;
16145 unpack = gen_sse4_1_zero_extendv4hiv4si2;
16147 unpack = gen_sse4_1_extendv4hiv4si2;
16151 unpack = gen_sse4_1_zero_extendv2siv2di2;
16153 unpack = gen_sse4_1_extendv2siv2di2;
16156 gcc_unreachable ();
16159 dest = operands[0];
16162 /* Shift higher 8 bytes to lower 8 bytes. */
16163 src = gen_reg_rtx (imode);
16164 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
16165 gen_lowpart (TImode, operands[1]),
16171 emit_insn (unpack (dest, src));
16174 /* This function performs the same task as ix86_expand_sse_unpack,
16175 but with sse5 instructions. */
16178 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16180 enum machine_mode imode = GET_MODE (operands[1]);
16181 int pperm_bytes[16];
16183 int h = (high_p) ? 8 : 0;
16186 rtvec v = rtvec_alloc (16);
16189 rtx op0 = operands[0], op1 = operands[1];
16194 vs = rtvec_alloc (8);
16195 h2 = (high_p) ? 8 : 0;
16196 for (i = 0; i < 8; i++)
16198 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
16199 pperm_bytes[2*i+1] = ((unsigned_p)
16201 : PPERM_SIGN | PPERM_SRC2 | i | h);
16204 for (i = 0; i < 16; i++)
16205 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16207 for (i = 0; i < 8; i++)
16208 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16210 p = gen_rtx_PARALLEL (VOIDmode, vs);
16211 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16213 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
16215 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
16219 vs = rtvec_alloc (4);
16220 h2 = (high_p) ? 4 : 0;
16221 for (i = 0; i < 4; i++)
16223 sign_extend = ((unsigned_p)
16225 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
16226 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
16227 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
16228 pperm_bytes[4*i+2] = sign_extend;
16229 pperm_bytes[4*i+3] = sign_extend;
16232 for (i = 0; i < 16; i++)
16233 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16235 for (i = 0; i < 4; i++)
16236 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16238 p = gen_rtx_PARALLEL (VOIDmode, vs);
16239 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16241 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
16243 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
16247 vs = rtvec_alloc (2);
16248 h2 = (high_p) ? 2 : 0;
16249 for (i = 0; i < 2; i++)
16251 sign_extend = ((unsigned_p)
16253 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
16254 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
16255 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
16256 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
16257 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
16258 pperm_bytes[8*i+4] = sign_extend;
16259 pperm_bytes[8*i+5] = sign_extend;
16260 pperm_bytes[8*i+6] = sign_extend;
16261 pperm_bytes[8*i+7] = sign_extend;
16264 for (i = 0; i < 16; i++)
16265 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16267 for (i = 0; i < 2; i++)
16268 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16270 p = gen_rtx_PARALLEL (VOIDmode, vs);
16271 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16273 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
16275 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
16279 gcc_unreachable ();
16285 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
16286 next narrower integer vector type */
16288 ix86_expand_sse5_pack (rtx operands[3])
16290 enum machine_mode imode = GET_MODE (operands[0]);
16291 int pperm_bytes[16];
16293 rtvec v = rtvec_alloc (16);
16295 rtx op0 = operands[0];
16296 rtx op1 = operands[1];
16297 rtx op2 = operands[2];
16302 for (i = 0; i < 8; i++)
16304 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
16305 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
16308 for (i = 0; i < 16; i++)
16309 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16311 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16312 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
16316 for (i = 0; i < 4; i++)
16318 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
16319 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
16320 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
16321 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
16324 for (i = 0; i < 16; i++)
16325 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16327 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16328 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
16332 for (i = 0; i < 2; i++)
16334 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
16335 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
16336 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
16337 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
16338 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
16339 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
16340 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
16341 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
16344 for (i = 0; i < 16; i++)
16345 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16347 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16348 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
16352 gcc_unreachable ();
16358 /* Expand conditional increment or decrement using adb/sbb instructions.
16359 The default case using setcc followed by the conditional move can be
16360 done by generic code. */
16362 ix86_expand_int_addcc (rtx operands[])
16364 enum rtx_code code = GET_CODE (operands[1]);
16366 rtx val = const0_rtx;
16367 bool fpcmp = false;
16368 enum machine_mode mode = GET_MODE (operands[0]);
16370 if (operands[3] != const1_rtx
16371 && operands[3] != constm1_rtx)
16373 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
16374 ix86_compare_op1, &compare_op))
16376 code = GET_CODE (compare_op);
16378 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
16379 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
16382 code = ix86_fp_compare_code_to_integer (code);
16389 PUT_CODE (compare_op,
16390 reverse_condition_maybe_unordered
16391 (GET_CODE (compare_op)));
16393 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
16395 PUT_MODE (compare_op, mode);
16397 /* Construct either adc or sbb insn. */
16398 if ((code == LTU) == (operands[3] == constm1_rtx))
16400 switch (GET_MODE (operands[0]))
16403 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
16406 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
16409 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
16412 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
16415 gcc_unreachable ();
16420 switch (GET_MODE (operands[0]))
16423 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
16426 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
16429 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
16432 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
16435 gcc_unreachable ();
16438 return 1; /* DONE */
16442 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
16443 works for floating pointer parameters and nonoffsetable memories.
16444 For pushes, it returns just stack offsets; the values will be saved
16445 in the right order. Maximally three parts are generated. */
16448 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
16453 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
16455 size = (GET_MODE_SIZE (mode) + 4) / 8;
16457 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
16458 gcc_assert (size >= 2 && size <= 4);
16460 /* Optimize constant pool reference to immediates. This is used by fp
16461 moves, that force all constants to memory to allow combining. */
16462 if (MEM_P (operand) && MEM_READONLY_P (operand))
16464 rtx tmp = maybe_get_pool_constant (operand);
16469 if (MEM_P (operand) && !offsettable_memref_p (operand))
16471 /* The only non-offsetable memories we handle are pushes. */
16472 int ok = push_operand (operand, VOIDmode);
16476 operand = copy_rtx (operand);
16477 PUT_MODE (operand, Pmode);
16478 parts[0] = parts[1] = parts[2] = parts[3] = operand;
16482 if (GET_CODE (operand) == CONST_VECTOR)
16484 enum machine_mode imode = int_mode_for_mode (mode);
16485 /* Caution: if we looked through a constant pool memory above,
16486 the operand may actually have a different mode now. That's
16487 ok, since we want to pun this all the way back to an integer. */
16488 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
16489 gcc_assert (operand != NULL);
16495 if (mode == DImode)
16496 split_di (&operand, 1, &parts[0], &parts[1]);
16501 if (REG_P (operand))
16503 gcc_assert (reload_completed);
16504 for (i = 0; i < size; i++)
16505 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
16507 else if (offsettable_memref_p (operand))
16509 operand = adjust_address (operand, SImode, 0);
16510 parts[0] = operand;
16511 for (i = 1; i < size; i++)
16512 parts[i] = adjust_address (operand, SImode, 4 * i);
16514 else if (GET_CODE (operand) == CONST_DOUBLE)
16519 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16523 real_to_target (l, &r, mode);
16524 parts[3] = gen_int_mode (l[3], SImode);
16525 parts[2] = gen_int_mode (l[2], SImode);
16528 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
16529 parts[2] = gen_int_mode (l[2], SImode);
16532 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
16535 gcc_unreachable ();
16537 parts[1] = gen_int_mode (l[1], SImode);
16538 parts[0] = gen_int_mode (l[0], SImode);
16541 gcc_unreachable ();
16546 if (mode == TImode)
16547 split_ti (&operand, 1, &parts[0], &parts[1]);
16548 if (mode == XFmode || mode == TFmode)
16550 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
16551 if (REG_P (operand))
16553 gcc_assert (reload_completed);
16554 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
16555 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
16557 else if (offsettable_memref_p (operand))
16559 operand = adjust_address (operand, DImode, 0);
16560 parts[0] = operand;
16561 parts[1] = adjust_address (operand, upper_mode, 8);
16563 else if (GET_CODE (operand) == CONST_DOUBLE)
16568 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16569 real_to_target (l, &r, mode);
16571 /* Do not use shift by 32 to avoid warning on 32bit systems. */
16572 if (HOST_BITS_PER_WIDE_INT >= 64)
16575 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
16576 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
16579 parts[0] = immed_double_const (l[0], l[1], DImode);
16581 if (upper_mode == SImode)
16582 parts[1] = gen_int_mode (l[2], SImode);
16583 else if (HOST_BITS_PER_WIDE_INT >= 64)
16586 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
16587 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
16590 parts[1] = immed_double_const (l[2], l[3], DImode);
16593 gcc_unreachable ();
16600 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
16601 Return false when normal moves are needed; true when all required
16602 insns have been emitted. Operands 2-4 contain the input values
16603 int the correct order; operands 5-7 contain the output values. */
16606 ix86_split_long_move (rtx operands[])
16611 int collisions = 0;
16612 enum machine_mode mode = GET_MODE (operands[0]);
16613 bool collisionparts[4];
16615 /* The DFmode expanders may ask us to move double.
16616 For 64bit target this is single move. By hiding the fact
16617 here we simplify i386.md splitters. */
16618 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
16620 /* Optimize constant pool reference to immediates. This is used by
16621 fp moves, that force all constants to memory to allow combining. */
16623 if (MEM_P (operands[1])
16624 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
16625 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
16626 operands[1] = get_pool_constant (XEXP (operands[1], 0));
16627 if (push_operand (operands[0], VOIDmode))
16629 operands[0] = copy_rtx (operands[0]);
16630 PUT_MODE (operands[0], Pmode);
16633 operands[0] = gen_lowpart (DImode, operands[0]);
16634 operands[1] = gen_lowpart (DImode, operands[1]);
16635 emit_move_insn (operands[0], operands[1]);
16639 /* The only non-offsettable memory we handle is push. */
16640 if (push_operand (operands[0], VOIDmode))
16643 gcc_assert (!MEM_P (operands[0])
16644 || offsettable_memref_p (operands[0]));
16646 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
16647 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
16649 /* When emitting push, take care for source operands on the stack. */
16650 if (push && MEM_P (operands[1])
16651 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
16652 for (i = 0; i < nparts - 1; i++)
16653 part[1][i] = change_address (part[1][i],
16654 GET_MODE (part[1][i]),
16655 XEXP (part[1][i + 1], 0));
16657 /* We need to do copy in the right order in case an address register
16658 of the source overlaps the destination. */
16659 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
16663 for (i = 0; i < nparts; i++)
16666 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
16667 if (collisionparts[i])
16671 /* Collision in the middle part can be handled by reordering. */
16672 if (collisions == 1 && nparts == 3 && collisionparts [1])
16674 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16675 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16677 else if (collisions == 1
16679 && (collisionparts [1] || collisionparts [2]))
16681 if (collisionparts [1])
16683 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16684 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16688 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
16689 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
16693 /* If there are more collisions, we can't handle it by reordering.
16694 Do an lea to the last part and use only one colliding move. */
16695 else if (collisions > 1)
16701 base = part[0][nparts - 1];
16703 /* Handle the case when the last part isn't valid for lea.
16704 Happens in 64-bit mode storing the 12-byte XFmode. */
16705 if (GET_MODE (base) != Pmode)
16706 base = gen_rtx_REG (Pmode, REGNO (base));
16708 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
16709 part[1][0] = replace_equiv_address (part[1][0], base);
16710 for (i = 1; i < nparts; i++)
16712 tmp = plus_constant (base, UNITS_PER_WORD * i);
16713 part[1][i] = replace_equiv_address (part[1][i], tmp);
16724 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
16725 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
16726 emit_move_insn (part[0][2], part[1][2]);
16728 else if (nparts == 4)
16730 emit_move_insn (part[0][3], part[1][3]);
16731 emit_move_insn (part[0][2], part[1][2]);
16736 /* In 64bit mode we don't have 32bit push available. In case this is
16737 register, it is OK - we will just use larger counterpart. We also
16738 retype memory - these comes from attempt to avoid REX prefix on
16739 moving of second half of TFmode value. */
16740 if (GET_MODE (part[1][1]) == SImode)
16742 switch (GET_CODE (part[1][1]))
16745 part[1][1] = adjust_address (part[1][1], DImode, 0);
16749 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
16753 gcc_unreachable ();
16756 if (GET_MODE (part[1][0]) == SImode)
16757 part[1][0] = part[1][1];
16760 emit_move_insn (part[0][1], part[1][1]);
16761 emit_move_insn (part[0][0], part[1][0]);
16765 /* Choose correct order to not overwrite the source before it is copied. */
16766 if ((REG_P (part[0][0])
16767 && REG_P (part[1][1])
16768 && (REGNO (part[0][0]) == REGNO (part[1][1])
16770 && REGNO (part[0][0]) == REGNO (part[1][2]))
16772 && REGNO (part[0][0]) == REGNO (part[1][3]))))
16774 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
16776 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
16778 operands[2 + i] = part[0][j];
16779 operands[6 + i] = part[1][j];
16784 for (i = 0; i < nparts; i++)
16786 operands[2 + i] = part[0][i];
16787 operands[6 + i] = part[1][i];
16791 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
16792 if (optimize_insn_for_size_p ())
16794 for (j = 0; j < nparts - 1; j++)
16795 if (CONST_INT_P (operands[6 + j])
16796 && operands[6 + j] != const0_rtx
16797 && REG_P (operands[2 + j]))
16798 for (i = j; i < nparts - 1; i++)
16799 if (CONST_INT_P (operands[7 + i])
16800 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
16801 operands[7 + i] = operands[2 + j];
16804 for (i = 0; i < nparts; i++)
16805 emit_move_insn (operands[2 + i], operands[6 + i]);
16810 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
16811 left shift by a constant, either using a single shift or
16812 a sequence of add instructions. */
16815 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
16819 emit_insn ((mode == DImode
16821 : gen_adddi3) (operand, operand, operand));
16823 else if (!optimize_insn_for_size_p ()
16824 && count * ix86_cost->add <= ix86_cost->shift_const)
16827 for (i=0; i<count; i++)
16829 emit_insn ((mode == DImode
16831 : gen_adddi3) (operand, operand, operand));
16835 emit_insn ((mode == DImode
16837 : gen_ashldi3) (operand, operand, GEN_INT (count)));
16841 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
16843 rtx low[2], high[2];
16845 const int single_width = mode == DImode ? 32 : 64;
16847 if (CONST_INT_P (operands[2]))
16849 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16850 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16852 if (count >= single_width)
16854 emit_move_insn (high[0], low[1]);
16855 emit_move_insn (low[0], const0_rtx);
16857 if (count > single_width)
16858 ix86_expand_ashl_const (high[0], count - single_width, mode);
16862 if (!rtx_equal_p (operands[0], operands[1]))
16863 emit_move_insn (operands[0], operands[1]);
16864 emit_insn ((mode == DImode
16866 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
16867 ix86_expand_ashl_const (low[0], count, mode);
16872 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16874 if (operands[1] == const1_rtx)
16876 /* Assuming we've chosen a QImode capable registers, then 1 << N
16877 can be done with two 32/64-bit shifts, no branches, no cmoves. */
16878 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
16880 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
16882 ix86_expand_clear (low[0]);
16883 ix86_expand_clear (high[0]);
16884 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
16886 d = gen_lowpart (QImode, low[0]);
16887 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16888 s = gen_rtx_EQ (QImode, flags, const0_rtx);
16889 emit_insn (gen_rtx_SET (VOIDmode, d, s));
16891 d = gen_lowpart (QImode, high[0]);
16892 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16893 s = gen_rtx_NE (QImode, flags, const0_rtx);
16894 emit_insn (gen_rtx_SET (VOIDmode, d, s));
16897 /* Otherwise, we can get the same results by manually performing
16898 a bit extract operation on bit 5/6, and then performing the two
16899 shifts. The two methods of getting 0/1 into low/high are exactly
16900 the same size. Avoiding the shift in the bit extract case helps
16901 pentium4 a bit; no one else seems to care much either way. */
16906 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
16907 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
16909 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
16910 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
16912 emit_insn ((mode == DImode
16914 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
16915 emit_insn ((mode == DImode
16917 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
16918 emit_move_insn (low[0], high[0]);
16919 emit_insn ((mode == DImode
16921 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
16924 emit_insn ((mode == DImode
16926 : gen_ashldi3) (low[0], low[0], operands[2]));
16927 emit_insn ((mode == DImode
16929 : gen_ashldi3) (high[0], high[0], operands[2]));
16933 if (operands[1] == constm1_rtx)
16935 /* For -1 << N, we can avoid the shld instruction, because we
16936 know that we're shifting 0...31/63 ones into a -1. */
16937 emit_move_insn (low[0], constm1_rtx);
16938 if (optimize_insn_for_size_p ())
16939 emit_move_insn (high[0], low[0]);
16941 emit_move_insn (high[0], constm1_rtx);
16945 if (!rtx_equal_p (operands[0], operands[1]))
16946 emit_move_insn (operands[0], operands[1]);
16948 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16949 emit_insn ((mode == DImode
16951 : gen_x86_64_shld) (high[0], low[0], operands[2]));
16954 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
16956 if (TARGET_CMOVE && scratch)
16958 ix86_expand_clear (scratch);
16959 emit_insn ((mode == DImode
16960 ? gen_x86_shift_adj_1
16961 : gen_x86_64_shift_adj_1) (high[0], low[0], operands[2],
16965 emit_insn ((mode == DImode
16966 ? gen_x86_shift_adj_2
16967 : gen_x86_64_shift_adj_2) (high[0], low[0], operands[2]));
16971 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
16973 rtx low[2], high[2];
16975 const int single_width = mode == DImode ? 32 : 64;
16977 if (CONST_INT_P (operands[2]))
16979 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16980 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16982 if (count == single_width * 2 - 1)
16984 emit_move_insn (high[0], high[1]);
16985 emit_insn ((mode == DImode
16987 : gen_ashrdi3) (high[0], high[0],
16988 GEN_INT (single_width - 1)));
16989 emit_move_insn (low[0], high[0]);
16992 else if (count >= single_width)
16994 emit_move_insn (low[0], high[1]);
16995 emit_move_insn (high[0], low[0]);
16996 emit_insn ((mode == DImode
16998 : gen_ashrdi3) (high[0], high[0],
16999 GEN_INT (single_width - 1)));
17000 if (count > single_width)
17001 emit_insn ((mode == DImode
17003 : gen_ashrdi3) (low[0], low[0],
17004 GEN_INT (count - single_width)));
17008 if (!rtx_equal_p (operands[0], operands[1]))
17009 emit_move_insn (operands[0], operands[1]);
17010 emit_insn ((mode == DImode
17012 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
17013 emit_insn ((mode == DImode
17015 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
17020 if (!rtx_equal_p (operands[0], operands[1]))
17021 emit_move_insn (operands[0], operands[1]);
17023 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17025 emit_insn ((mode == DImode
17027 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
17028 emit_insn ((mode == DImode
17030 : gen_ashrdi3) (high[0], high[0], operands[2]));
17032 if (TARGET_CMOVE && scratch)
17034 emit_move_insn (scratch, high[0]);
17035 emit_insn ((mode == DImode
17037 : gen_ashrdi3) (scratch, scratch,
17038 GEN_INT (single_width - 1)));
17039 emit_insn ((mode == DImode
17040 ? gen_x86_shift_adj_1
17041 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
17045 emit_insn ((mode == DImode
17046 ? gen_x86_shift_adj_3
17047 : gen_x86_64_shift_adj_3) (low[0], high[0], operands[2]));
17052 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
17054 rtx low[2], high[2];
17056 const int single_width = mode == DImode ? 32 : 64;
17058 if (CONST_INT_P (operands[2]))
17060 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17061 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17063 if (count >= single_width)
17065 emit_move_insn (low[0], high[1]);
17066 ix86_expand_clear (high[0]);
17068 if (count > single_width)
17069 emit_insn ((mode == DImode
17071 : gen_lshrdi3) (low[0], low[0],
17072 GEN_INT (count - single_width)));
17076 if (!rtx_equal_p (operands[0], operands[1]))
17077 emit_move_insn (operands[0], operands[1]);
17078 emit_insn ((mode == DImode
17080 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
17081 emit_insn ((mode == DImode
17083 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
17088 if (!rtx_equal_p (operands[0], operands[1]))
17089 emit_move_insn (operands[0], operands[1]);
17091 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17093 emit_insn ((mode == DImode
17095 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
17096 emit_insn ((mode == DImode
17098 : gen_lshrdi3) (high[0], high[0], operands[2]));
17100 /* Heh. By reversing the arguments, we can reuse this pattern. */
17101 if (TARGET_CMOVE && scratch)
17103 ix86_expand_clear (scratch);
17104 emit_insn ((mode == DImode
17105 ? gen_x86_shift_adj_1
17106 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
17110 emit_insn ((mode == DImode
17111 ? gen_x86_shift_adj_2
17112 : gen_x86_64_shift_adj_2) (low[0], high[0], operands[2]));
17116 /* Predict just emitted jump instruction to be taken with probability PROB. */
17118 predict_jump (int prob)
17120 rtx insn = get_last_insn ();
17121 gcc_assert (JUMP_P (insn));
17122 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
17125 /* Helper function for the string operations below. Dest VARIABLE whether
17126 it is aligned to VALUE bytes. If true, jump to the label. */
17128 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
17130 rtx label = gen_label_rtx ();
17131 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
17132 if (GET_MODE (variable) == DImode)
17133 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
17135 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
17136 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
17139 predict_jump (REG_BR_PROB_BASE * 50 / 100);
17141 predict_jump (REG_BR_PROB_BASE * 90 / 100);
17145 /* Adjust COUNTER by the VALUE. */
17147 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
17149 if (GET_MODE (countreg) == DImode)
17150 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
17152 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
17155 /* Zero extend possibly SImode EXP to Pmode register. */
17157 ix86_zero_extend_to_Pmode (rtx exp)
17160 if (GET_MODE (exp) == VOIDmode)
17161 return force_reg (Pmode, exp);
17162 if (GET_MODE (exp) == Pmode)
17163 return copy_to_mode_reg (Pmode, exp);
17164 r = gen_reg_rtx (Pmode);
17165 emit_insn (gen_zero_extendsidi2 (r, exp));
17169 /* Divide COUNTREG by SCALE. */
17171 scale_counter (rtx countreg, int scale)
17174 rtx piece_size_mask;
17178 if (CONST_INT_P (countreg))
17179 return GEN_INT (INTVAL (countreg) / scale);
17180 gcc_assert (REG_P (countreg));
17182 piece_size_mask = GEN_INT (scale - 1);
17183 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
17184 GEN_INT (exact_log2 (scale)),
17185 NULL, 1, OPTAB_DIRECT);
17189 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
17190 DImode for constant loop counts. */
17192 static enum machine_mode
17193 counter_mode (rtx count_exp)
17195 if (GET_MODE (count_exp) != VOIDmode)
17196 return GET_MODE (count_exp);
17197 if (GET_CODE (count_exp) != CONST_INT)
17199 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
17204 /* When SRCPTR is non-NULL, output simple loop to move memory
17205 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
17206 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
17207 equivalent loop to set memory by VALUE (supposed to be in MODE).
17209 The size is rounded down to whole number of chunk size moved at once.
17210 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
17214 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
17215 rtx destptr, rtx srcptr, rtx value,
17216 rtx count, enum machine_mode mode, int unroll,
17219 rtx out_label, top_label, iter, tmp;
17220 enum machine_mode iter_mode = counter_mode (count);
17221 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
17222 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
17228 top_label = gen_label_rtx ();
17229 out_label = gen_label_rtx ();
17230 iter = gen_reg_rtx (iter_mode);
17232 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
17233 NULL, 1, OPTAB_DIRECT);
17234 /* Those two should combine. */
17235 if (piece_size == const1_rtx)
17237 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
17239 predict_jump (REG_BR_PROB_BASE * 10 / 100);
17241 emit_move_insn (iter, const0_rtx);
17243 emit_label (top_label);
17245 tmp = convert_modes (Pmode, iter_mode, iter, true);
17246 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
17247 destmem = change_address (destmem, mode, x_addr);
17251 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
17252 srcmem = change_address (srcmem, mode, y_addr);
17254 /* When unrolling for chips that reorder memory reads and writes,
17255 we can save registers by using single temporary.
17256 Also using 4 temporaries is overkill in 32bit mode. */
17257 if (!TARGET_64BIT && 0)
17259 for (i = 0; i < unroll; i++)
17264 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17266 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
17268 emit_move_insn (destmem, srcmem);
17274 gcc_assert (unroll <= 4);
17275 for (i = 0; i < unroll; i++)
17277 tmpreg[i] = gen_reg_rtx (mode);
17281 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
17283 emit_move_insn (tmpreg[i], srcmem);
17285 for (i = 0; i < unroll; i++)
17290 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17292 emit_move_insn (destmem, tmpreg[i]);
17297 for (i = 0; i < unroll; i++)
17301 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17302 emit_move_insn (destmem, value);
17305 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
17306 true, OPTAB_LIB_WIDEN);
17308 emit_move_insn (iter, tmp);
17310 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
17312 if (expected_size != -1)
17314 expected_size /= GET_MODE_SIZE (mode) * unroll;
17315 if (expected_size == 0)
17317 else if (expected_size > REG_BR_PROB_BASE)
17318 predict_jump (REG_BR_PROB_BASE - 1);
17320 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
17323 predict_jump (REG_BR_PROB_BASE * 80 / 100);
17324 iter = ix86_zero_extend_to_Pmode (iter);
17325 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
17326 true, OPTAB_LIB_WIDEN);
17327 if (tmp != destptr)
17328 emit_move_insn (destptr, tmp);
17331 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
17332 true, OPTAB_LIB_WIDEN);
17334 emit_move_insn (srcptr, tmp);
17336 emit_label (out_label);
17339 /* Output "rep; mov" instruction.
17340 Arguments have same meaning as for previous function */
17342 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
17343 rtx destptr, rtx srcptr,
17345 enum machine_mode mode)
17351 /* If the size is known, it is shorter to use rep movs. */
17352 if (mode == QImode && CONST_INT_P (count)
17353 && !(INTVAL (count) & 3))
17356 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17357 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17358 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
17359 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
17360 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17361 if (mode != QImode)
17363 destexp = gen_rtx_ASHIFT (Pmode, countreg,
17364 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17365 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17366 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
17367 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17368 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
17372 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17373 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
17375 if (CONST_INT_P (count))
17377 count = GEN_INT (INTVAL (count)
17378 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17379 destmem = shallow_copy_rtx (destmem);
17380 srcmem = shallow_copy_rtx (srcmem);
17381 set_mem_size (destmem, count);
17382 set_mem_size (srcmem, count);
17386 if (MEM_SIZE (destmem))
17387 set_mem_size (destmem, NULL_RTX);
17388 if (MEM_SIZE (srcmem))
17389 set_mem_size (srcmem, NULL_RTX);
17391 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
17395 /* Output "rep; stos" instruction.
17396 Arguments have same meaning as for previous function */
17398 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
17399 rtx count, enum machine_mode mode,
17405 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17406 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17407 value = force_reg (mode, gen_lowpart (mode, value));
17408 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17409 if (mode != QImode)
17411 destexp = gen_rtx_ASHIFT (Pmode, countreg,
17412 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17413 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17416 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17417 if (orig_value == const0_rtx && CONST_INT_P (count))
17419 count = GEN_INT (INTVAL (count)
17420 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17421 destmem = shallow_copy_rtx (destmem);
17422 set_mem_size (destmem, count);
17424 else if (MEM_SIZE (destmem))
17425 set_mem_size (destmem, NULL_RTX);
17426 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
17430 emit_strmov (rtx destmem, rtx srcmem,
17431 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
17433 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
17434 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
17435 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17438 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
17440 expand_movmem_epilogue (rtx destmem, rtx srcmem,
17441 rtx destptr, rtx srcptr, rtx count, int max_size)
17444 if (CONST_INT_P (count))
17446 HOST_WIDE_INT countval = INTVAL (count);
17449 if ((countval & 0x10) && max_size > 16)
17453 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17454 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
17457 gcc_unreachable ();
17460 if ((countval & 0x08) && max_size > 8)
17463 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17466 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17467 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
17471 if ((countval & 0x04) && max_size > 4)
17473 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17476 if ((countval & 0x02) && max_size > 2)
17478 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
17481 if ((countval & 0x01) && max_size > 1)
17483 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
17490 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
17491 count, 1, OPTAB_DIRECT);
17492 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
17493 count, QImode, 1, 4);
17497 /* When there are stringops, we can cheaply increase dest and src pointers.
17498 Otherwise we save code size by maintaining offset (zero is readily
17499 available from preceding rep operation) and using x86 addressing modes.
17501 if (TARGET_SINGLE_STRINGOP)
17505 rtx label = ix86_expand_aligntest (count, 4, true);
17506 src = change_address (srcmem, SImode, srcptr);
17507 dest = change_address (destmem, SImode, destptr);
17508 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17509 emit_label (label);
17510 LABEL_NUSES (label) = 1;
17514 rtx label = ix86_expand_aligntest (count, 2, true);
17515 src = change_address (srcmem, HImode, srcptr);
17516 dest = change_address (destmem, HImode, destptr);
17517 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17518 emit_label (label);
17519 LABEL_NUSES (label) = 1;
17523 rtx label = ix86_expand_aligntest (count, 1, true);
17524 src = change_address (srcmem, QImode, srcptr);
17525 dest = change_address (destmem, QImode, destptr);
17526 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17527 emit_label (label);
17528 LABEL_NUSES (label) = 1;
17533 rtx offset = force_reg (Pmode, const0_rtx);
17538 rtx label = ix86_expand_aligntest (count, 4, true);
17539 src = change_address (srcmem, SImode, srcptr);
17540 dest = change_address (destmem, SImode, destptr);
17541 emit_move_insn (dest, src);
17542 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
17543 true, OPTAB_LIB_WIDEN);
17545 emit_move_insn (offset, tmp);
17546 emit_label (label);
17547 LABEL_NUSES (label) = 1;
17551 rtx label = ix86_expand_aligntest (count, 2, true);
17552 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17553 src = change_address (srcmem, HImode, tmp);
17554 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17555 dest = change_address (destmem, HImode, tmp);
17556 emit_move_insn (dest, src);
17557 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
17558 true, OPTAB_LIB_WIDEN);
17560 emit_move_insn (offset, tmp);
17561 emit_label (label);
17562 LABEL_NUSES (label) = 1;
17566 rtx label = ix86_expand_aligntest (count, 1, true);
17567 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17568 src = change_address (srcmem, QImode, tmp);
17569 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17570 dest = change_address (destmem, QImode, tmp);
17571 emit_move_insn (dest, src);
17572 emit_label (label);
17573 LABEL_NUSES (label) = 1;
17578 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17580 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
17581 rtx count, int max_size)
17584 expand_simple_binop (counter_mode (count), AND, count,
17585 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
17586 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
17587 gen_lowpart (QImode, value), count, QImode,
17591 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17593 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
17597 if (CONST_INT_P (count))
17599 HOST_WIDE_INT countval = INTVAL (count);
17602 if ((countval & 0x10) && max_size > 16)
17606 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17607 emit_insn (gen_strset (destptr, dest, value));
17608 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
17609 emit_insn (gen_strset (destptr, dest, value));
17612 gcc_unreachable ();
17615 if ((countval & 0x08) && max_size > 8)
17619 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17620 emit_insn (gen_strset (destptr, dest, value));
17624 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17625 emit_insn (gen_strset (destptr, dest, value));
17626 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
17627 emit_insn (gen_strset (destptr, dest, value));
17631 if ((countval & 0x04) && max_size > 4)
17633 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17634 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17637 if ((countval & 0x02) && max_size > 2)
17639 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
17640 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17643 if ((countval & 0x01) && max_size > 1)
17645 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
17646 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17653 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
17658 rtx label = ix86_expand_aligntest (count, 16, true);
17661 dest = change_address (destmem, DImode, destptr);
17662 emit_insn (gen_strset (destptr, dest, value));
17663 emit_insn (gen_strset (destptr, dest, value));
17667 dest = change_address (destmem, SImode, destptr);
17668 emit_insn (gen_strset (destptr, dest, value));
17669 emit_insn (gen_strset (destptr, dest, value));
17670 emit_insn (gen_strset (destptr, dest, value));
17671 emit_insn (gen_strset (destptr, dest, value));
17673 emit_label (label);
17674 LABEL_NUSES (label) = 1;
17678 rtx label = ix86_expand_aligntest (count, 8, true);
17681 dest = change_address (destmem, DImode, destptr);
17682 emit_insn (gen_strset (destptr, dest, value));
17686 dest = change_address (destmem, SImode, destptr);
17687 emit_insn (gen_strset (destptr, dest, value));
17688 emit_insn (gen_strset (destptr, dest, value));
17690 emit_label (label);
17691 LABEL_NUSES (label) = 1;
17695 rtx label = ix86_expand_aligntest (count, 4, true);
17696 dest = change_address (destmem, SImode, destptr);
17697 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17698 emit_label (label);
17699 LABEL_NUSES (label) = 1;
17703 rtx label = ix86_expand_aligntest (count, 2, true);
17704 dest = change_address (destmem, HImode, destptr);
17705 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17706 emit_label (label);
17707 LABEL_NUSES (label) = 1;
17711 rtx label = ix86_expand_aligntest (count, 1, true);
17712 dest = change_address (destmem, QImode, destptr);
17713 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17714 emit_label (label);
17715 LABEL_NUSES (label) = 1;
17719 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
17720 DESIRED_ALIGNMENT. */
17722 expand_movmem_prologue (rtx destmem, rtx srcmem,
17723 rtx destptr, rtx srcptr, rtx count,
17724 int align, int desired_alignment)
17726 if (align <= 1 && desired_alignment > 1)
17728 rtx label = ix86_expand_aligntest (destptr, 1, false);
17729 srcmem = change_address (srcmem, QImode, srcptr);
17730 destmem = change_address (destmem, QImode, destptr);
17731 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17732 ix86_adjust_counter (count, 1);
17733 emit_label (label);
17734 LABEL_NUSES (label) = 1;
17736 if (align <= 2 && desired_alignment > 2)
17738 rtx label = ix86_expand_aligntest (destptr, 2, false);
17739 srcmem = change_address (srcmem, HImode, srcptr);
17740 destmem = change_address (destmem, HImode, destptr);
17741 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17742 ix86_adjust_counter (count, 2);
17743 emit_label (label);
17744 LABEL_NUSES (label) = 1;
17746 if (align <= 4 && desired_alignment > 4)
17748 rtx label = ix86_expand_aligntest (destptr, 4, false);
17749 srcmem = change_address (srcmem, SImode, srcptr);
17750 destmem = change_address (destmem, SImode, destptr);
17751 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17752 ix86_adjust_counter (count, 4);
17753 emit_label (label);
17754 LABEL_NUSES (label) = 1;
17756 gcc_assert (desired_alignment <= 8);
17759 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
17760 ALIGN_BYTES is how many bytes need to be copied. */
17762 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
17763 int desired_align, int align_bytes)
17766 rtx src_size, dst_size;
17768 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
17769 if (src_align_bytes >= 0)
17770 src_align_bytes = desired_align - src_align_bytes;
17771 src_size = MEM_SIZE (src);
17772 dst_size = MEM_SIZE (dst);
17773 if (align_bytes & 1)
17775 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
17776 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
17778 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17780 if (align_bytes & 2)
17782 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
17783 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
17784 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
17785 set_mem_align (dst, 2 * BITS_PER_UNIT);
17786 if (src_align_bytes >= 0
17787 && (src_align_bytes & 1) == (align_bytes & 1)
17788 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
17789 set_mem_align (src, 2 * BITS_PER_UNIT);
17791 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17793 if (align_bytes & 4)
17795 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
17796 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
17797 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
17798 set_mem_align (dst, 4 * BITS_PER_UNIT);
17799 if (src_align_bytes >= 0)
17801 unsigned int src_align = 0;
17802 if ((src_align_bytes & 3) == (align_bytes & 3))
17804 else if ((src_align_bytes & 1) == (align_bytes & 1))
17806 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17807 set_mem_align (src, src_align * BITS_PER_UNIT);
17810 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17812 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
17813 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
17814 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
17815 set_mem_align (dst, desired_align * BITS_PER_UNIT);
17816 if (src_align_bytes >= 0)
17818 unsigned int src_align = 0;
17819 if ((src_align_bytes & 7) == (align_bytes & 7))
17821 else if ((src_align_bytes & 3) == (align_bytes & 3))
17823 else if ((src_align_bytes & 1) == (align_bytes & 1))
17825 if (src_align > (unsigned int) desired_align)
17826 src_align = desired_align;
17827 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17828 set_mem_align (src, src_align * BITS_PER_UNIT);
17831 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
17833 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
17838 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
17839 DESIRED_ALIGNMENT. */
17841 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
17842 int align, int desired_alignment)
17844 if (align <= 1 && desired_alignment > 1)
17846 rtx label = ix86_expand_aligntest (destptr, 1, false);
17847 destmem = change_address (destmem, QImode, destptr);
17848 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
17849 ix86_adjust_counter (count, 1);
17850 emit_label (label);
17851 LABEL_NUSES (label) = 1;
17853 if (align <= 2 && desired_alignment > 2)
17855 rtx label = ix86_expand_aligntest (destptr, 2, false);
17856 destmem = change_address (destmem, HImode, destptr);
17857 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
17858 ix86_adjust_counter (count, 2);
17859 emit_label (label);
17860 LABEL_NUSES (label) = 1;
17862 if (align <= 4 && desired_alignment > 4)
17864 rtx label = ix86_expand_aligntest (destptr, 4, false);
17865 destmem = change_address (destmem, SImode, destptr);
17866 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
17867 ix86_adjust_counter (count, 4);
17868 emit_label (label);
17869 LABEL_NUSES (label) = 1;
17871 gcc_assert (desired_alignment <= 8);
17874 /* Set enough from DST to align DST known to by aligned by ALIGN to
17875 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
17877 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
17878 int desired_align, int align_bytes)
17881 rtx dst_size = MEM_SIZE (dst);
17882 if (align_bytes & 1)
17884 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
17886 emit_insn (gen_strset (destreg, dst,
17887 gen_lowpart (QImode, value)));
17889 if (align_bytes & 2)
17891 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
17892 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
17893 set_mem_align (dst, 2 * BITS_PER_UNIT);
17895 emit_insn (gen_strset (destreg, dst,
17896 gen_lowpart (HImode, value)));
17898 if (align_bytes & 4)
17900 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
17901 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
17902 set_mem_align (dst, 4 * BITS_PER_UNIT);
17904 emit_insn (gen_strset (destreg, dst,
17905 gen_lowpart (SImode, value)));
17907 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
17908 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
17909 set_mem_align (dst, desired_align * BITS_PER_UNIT);
17911 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
17915 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
17916 static enum stringop_alg
17917 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
17918 int *dynamic_check)
17920 const struct stringop_algs * algs;
17921 bool optimize_for_speed;
17922 /* Algorithms using the rep prefix want at least edi and ecx;
17923 additionally, memset wants eax and memcpy wants esi. Don't
17924 consider such algorithms if the user has appropriated those
17925 registers for their own purposes. */
17926 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
17928 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
17930 #define ALG_USABLE_P(alg) (rep_prefix_usable \
17931 || (alg != rep_prefix_1_byte \
17932 && alg != rep_prefix_4_byte \
17933 && alg != rep_prefix_8_byte))
17934 const struct processor_costs *cost;
17936 /* Even if the string operation call is cold, we still might spend a lot
17937 of time processing large blocks. */
17938 if (optimize_function_for_size_p (cfun)
17939 || (optimize_insn_for_size_p ()
17940 && expected_size != -1 && expected_size < 256))
17941 optimize_for_speed = false;
17943 optimize_for_speed = true;
17945 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
17947 *dynamic_check = -1;
17949 algs = &cost->memset[TARGET_64BIT != 0];
17951 algs = &cost->memcpy[TARGET_64BIT != 0];
17952 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
17953 return stringop_alg;
17954 /* rep; movq or rep; movl is the smallest variant. */
17955 else if (!optimize_for_speed)
17957 if (!count || (count & 3))
17958 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
17960 return rep_prefix_usable ? rep_prefix_4_byte : loop;
17962 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
17964 else if (expected_size != -1 && expected_size < 4)
17965 return loop_1_byte;
17966 else if (expected_size != -1)
17969 enum stringop_alg alg = libcall;
17970 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
17972 /* We get here if the algorithms that were not libcall-based
17973 were rep-prefix based and we are unable to use rep prefixes
17974 based on global register usage. Break out of the loop and
17975 use the heuristic below. */
17976 if (algs->size[i].max == 0)
17978 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
17980 enum stringop_alg candidate = algs->size[i].alg;
17982 if (candidate != libcall && ALG_USABLE_P (candidate))
17984 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
17985 last non-libcall inline algorithm. */
17986 if (TARGET_INLINE_ALL_STRINGOPS)
17988 /* When the current size is best to be copied by a libcall,
17989 but we are still forced to inline, run the heuristic below
17990 that will pick code for medium sized blocks. */
17991 if (alg != libcall)
17995 else if (ALG_USABLE_P (candidate))
17999 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
18001 /* When asked to inline the call anyway, try to pick meaningful choice.
18002 We look for maximal size of block that is faster to copy by hand and
18003 take blocks of at most of that size guessing that average size will
18004 be roughly half of the block.
18006 If this turns out to be bad, we might simply specify the preferred
18007 choice in ix86_costs. */
18008 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18009 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
18012 enum stringop_alg alg;
18014 bool any_alg_usable_p = true;
18016 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
18018 enum stringop_alg candidate = algs->size[i].alg;
18019 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
18021 if (candidate != libcall && candidate
18022 && ALG_USABLE_P (candidate))
18023 max = algs->size[i].max;
18025 /* If there aren't any usable algorithms, then recursing on
18026 smaller sizes isn't going to find anything. Just return the
18027 simple byte-at-a-time copy loop. */
18028 if (!any_alg_usable_p)
18030 /* Pick something reasonable. */
18031 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18032 *dynamic_check = 128;
18033 return loop_1_byte;
18037 alg = decide_alg (count, max / 2, memset, dynamic_check);
18038 gcc_assert (*dynamic_check == -1);
18039 gcc_assert (alg != libcall);
18040 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18041 *dynamic_check = max;
18044 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
18045 #undef ALG_USABLE_P
18048 /* Decide on alignment. We know that the operand is already aligned to ALIGN
18049 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
18051 decide_alignment (int align,
18052 enum stringop_alg alg,
18055 int desired_align = 0;
18059 gcc_unreachable ();
18061 case unrolled_loop:
18062 desired_align = GET_MODE_SIZE (Pmode);
18064 case rep_prefix_8_byte:
18067 case rep_prefix_4_byte:
18068 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
18069 copying whole cacheline at once. */
18070 if (TARGET_PENTIUMPRO)
18075 case rep_prefix_1_byte:
18076 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
18077 copying whole cacheline at once. */
18078 if (TARGET_PENTIUMPRO)
18092 if (desired_align < align)
18093 desired_align = align;
18094 if (expected_size != -1 && expected_size < 4)
18095 desired_align = align;
18096 return desired_align;
18099 /* Return the smallest power of 2 greater than VAL. */
18101 smallest_pow2_greater_than (int val)
18109 /* Expand string move (memcpy) operation. Use i386 string operations when
18110 profitable. expand_setmem contains similar code. The code depends upon
18111 architecture, block size and alignment, but always has the same
18114 1) Prologue guard: Conditional that jumps up to epilogues for small
18115 blocks that can be handled by epilogue alone. This is faster but
18116 also needed for correctness, since prologue assume the block is larger
18117 than the desired alignment.
18119 Optional dynamic check for size and libcall for large
18120 blocks is emitted here too, with -minline-stringops-dynamically.
18122 2) Prologue: copy first few bytes in order to get destination aligned
18123 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
18124 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
18125 We emit either a jump tree on power of two sized blocks, or a byte loop.
18127 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
18128 with specified algorithm.
18130 4) Epilogue: code copying tail of the block that is too small to be
18131 handled by main body (or up to size guarded by prologue guard). */
18134 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
18135 rtx expected_align_exp, rtx expected_size_exp)
18141 rtx jump_around_label = NULL;
18142 HOST_WIDE_INT align = 1;
18143 unsigned HOST_WIDE_INT count = 0;
18144 HOST_WIDE_INT expected_size = -1;
18145 int size_needed = 0, epilogue_size_needed;
18146 int desired_align = 0, align_bytes = 0;
18147 enum stringop_alg alg;
18149 bool need_zero_guard = false;
18151 if (CONST_INT_P (align_exp))
18152 align = INTVAL (align_exp);
18153 /* i386 can do misaligned access on reasonably increased cost. */
18154 if (CONST_INT_P (expected_align_exp)
18155 && INTVAL (expected_align_exp) > align)
18156 align = INTVAL (expected_align_exp);
18157 /* ALIGN is the minimum of destination and source alignment, but we care here
18158 just about destination alignment. */
18159 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
18160 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
18162 if (CONST_INT_P (count_exp))
18163 count = expected_size = INTVAL (count_exp);
18164 if (CONST_INT_P (expected_size_exp) && count == 0)
18165 expected_size = INTVAL (expected_size_exp);
18167 /* Make sure we don't need to care about overflow later on. */
18168 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
18171 /* Step 0: Decide on preferred algorithm, desired alignment and
18172 size of chunks to be copied by main loop. */
18174 alg = decide_alg (count, expected_size, false, &dynamic_check);
18175 desired_align = decide_alignment (align, alg, expected_size);
18177 if (!TARGET_ALIGN_STRINGOPS)
18178 align = desired_align;
18180 if (alg == libcall)
18182 gcc_assert (alg != no_stringop);
18184 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
18185 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18186 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
18191 gcc_unreachable ();
18193 need_zero_guard = true;
18194 size_needed = GET_MODE_SIZE (Pmode);
18196 case unrolled_loop:
18197 need_zero_guard = true;
18198 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
18200 case rep_prefix_8_byte:
18203 case rep_prefix_4_byte:
18206 case rep_prefix_1_byte:
18210 need_zero_guard = true;
18215 epilogue_size_needed = size_needed;
18217 /* Step 1: Prologue guard. */
18219 /* Alignment code needs count to be in register. */
18220 if (CONST_INT_P (count_exp) && desired_align > align)
18222 if (INTVAL (count_exp) > desired_align
18223 && INTVAL (count_exp) > size_needed)
18226 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
18227 if (align_bytes <= 0)
18230 align_bytes = desired_align - align_bytes;
18232 if (align_bytes == 0)
18233 count_exp = force_reg (counter_mode (count_exp), count_exp);
18235 gcc_assert (desired_align >= 1 && align >= 1);
18237 /* Ensure that alignment prologue won't copy past end of block. */
18238 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18240 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18241 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
18242 Make sure it is power of 2. */
18243 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18247 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18249 /* If main algorithm works on QImode, no epilogue is needed.
18250 For small sizes just don't align anything. */
18251 if (size_needed == 1)
18252 desired_align = align;
18259 label = gen_label_rtx ();
18260 emit_cmp_and_jump_insns (count_exp,
18261 GEN_INT (epilogue_size_needed),
18262 LTU, 0, counter_mode (count_exp), 1, label);
18263 if (expected_size == -1 || expected_size < epilogue_size_needed)
18264 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18266 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18270 /* Emit code to decide on runtime whether library call or inline should be
18272 if (dynamic_check != -1)
18274 if (CONST_INT_P (count_exp))
18276 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
18278 emit_block_move_via_libcall (dst, src, count_exp, false);
18279 count_exp = const0_rtx;
18285 rtx hot_label = gen_label_rtx ();
18286 jump_around_label = gen_label_rtx ();
18287 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18288 LEU, 0, GET_MODE (count_exp), 1, hot_label);
18289 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18290 emit_block_move_via_libcall (dst, src, count_exp, false);
18291 emit_jump (jump_around_label);
18292 emit_label (hot_label);
18296 /* Step 2: Alignment prologue. */
18298 if (desired_align > align)
18300 if (align_bytes == 0)
18302 /* Except for the first move in epilogue, we no longer know
18303 constant offset in aliasing info. It don't seems to worth
18304 the pain to maintain it for the first move, so throw away
18306 src = change_address (src, BLKmode, srcreg);
18307 dst = change_address (dst, BLKmode, destreg);
18308 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
18313 /* If we know how many bytes need to be stored before dst is
18314 sufficiently aligned, maintain aliasing info accurately. */
18315 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
18316 desired_align, align_bytes);
18317 count_exp = plus_constant (count_exp, -align_bytes);
18318 count -= align_bytes;
18320 if (need_zero_guard
18321 && (count < (unsigned HOST_WIDE_INT) size_needed
18322 || (align_bytes == 0
18323 && count < ((unsigned HOST_WIDE_INT) size_needed
18324 + desired_align - align))))
18326 /* It is possible that we copied enough so the main loop will not
18328 gcc_assert (size_needed > 1);
18329 if (label == NULL_RTX)
18330 label = gen_label_rtx ();
18331 emit_cmp_and_jump_insns (count_exp,
18332 GEN_INT (size_needed),
18333 LTU, 0, counter_mode (count_exp), 1, label);
18334 if (expected_size == -1
18335 || expected_size < (desired_align - align) / 2 + size_needed)
18336 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18338 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18341 if (label && size_needed == 1)
18343 emit_label (label);
18344 LABEL_NUSES (label) = 1;
18346 epilogue_size_needed = 1;
18348 else if (label == NULL_RTX)
18349 epilogue_size_needed = size_needed;
18351 /* Step 3: Main loop. */
18357 gcc_unreachable ();
18359 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18360 count_exp, QImode, 1, expected_size);
18363 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18364 count_exp, Pmode, 1, expected_size);
18366 case unrolled_loop:
18367 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
18368 registers for 4 temporaries anyway. */
18369 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18370 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
18373 case rep_prefix_8_byte:
18374 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18377 case rep_prefix_4_byte:
18378 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18381 case rep_prefix_1_byte:
18382 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18386 /* Adjust properly the offset of src and dest memory for aliasing. */
18387 if (CONST_INT_P (count_exp))
18389 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
18390 (count / size_needed) * size_needed);
18391 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18392 (count / size_needed) * size_needed);
18396 src = change_address (src, BLKmode, srcreg);
18397 dst = change_address (dst, BLKmode, destreg);
18400 /* Step 4: Epilogue to copy the remaining bytes. */
18404 /* When the main loop is done, COUNT_EXP might hold original count,
18405 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18406 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18407 bytes. Compensate if needed. */
18409 if (size_needed < epilogue_size_needed)
18412 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18413 GEN_INT (size_needed - 1), count_exp, 1,
18415 if (tmp != count_exp)
18416 emit_move_insn (count_exp, tmp);
18418 emit_label (label);
18419 LABEL_NUSES (label) = 1;
18422 if (count_exp != const0_rtx && epilogue_size_needed > 1)
18423 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
18424 epilogue_size_needed);
18425 if (jump_around_label)
18426 emit_label (jump_around_label);
18430 /* Helper function for memcpy. For QImode value 0xXY produce
18431 0xXYXYXYXY of wide specified by MODE. This is essentially
18432 a * 0x10101010, but we can do slightly better than
18433 synth_mult by unwinding the sequence by hand on CPUs with
18436 promote_duplicated_reg (enum machine_mode mode, rtx val)
18438 enum machine_mode valmode = GET_MODE (val);
18440 int nops = mode == DImode ? 3 : 2;
18442 gcc_assert (mode == SImode || mode == DImode);
18443 if (val == const0_rtx)
18444 return copy_to_mode_reg (mode, const0_rtx);
18445 if (CONST_INT_P (val))
18447 HOST_WIDE_INT v = INTVAL (val) & 255;
18451 if (mode == DImode)
18452 v |= (v << 16) << 16;
18453 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
18456 if (valmode == VOIDmode)
18458 if (valmode != QImode)
18459 val = gen_lowpart (QImode, val);
18460 if (mode == QImode)
18462 if (!TARGET_PARTIAL_REG_STALL)
18464 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
18465 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
18466 <= (ix86_cost->shift_const + ix86_cost->add) * nops
18467 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
18469 rtx reg = convert_modes (mode, QImode, val, true);
18470 tmp = promote_duplicated_reg (mode, const1_rtx);
18471 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
18476 rtx reg = convert_modes (mode, QImode, val, true);
18478 if (!TARGET_PARTIAL_REG_STALL)
18479 if (mode == SImode)
18480 emit_insn (gen_movsi_insv_1 (reg, reg));
18482 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
18485 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
18486 NULL, 1, OPTAB_DIRECT);
18488 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18490 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
18491 NULL, 1, OPTAB_DIRECT);
18492 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18493 if (mode == SImode)
18495 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
18496 NULL, 1, OPTAB_DIRECT);
18497 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18502 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
18503 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
18504 alignment from ALIGN to DESIRED_ALIGN. */
18506 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
18511 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
18512 promoted_val = promote_duplicated_reg (DImode, val);
18513 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
18514 promoted_val = promote_duplicated_reg (SImode, val);
18515 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
18516 promoted_val = promote_duplicated_reg (HImode, val);
18518 promoted_val = val;
18520 return promoted_val;
18523 /* Expand string clear operation (bzero). Use i386 string operations when
18524 profitable. See expand_movmem comment for explanation of individual
18525 steps performed. */
18527 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
18528 rtx expected_align_exp, rtx expected_size_exp)
18533 rtx jump_around_label = NULL;
18534 HOST_WIDE_INT align = 1;
18535 unsigned HOST_WIDE_INT count = 0;
18536 HOST_WIDE_INT expected_size = -1;
18537 int size_needed = 0, epilogue_size_needed;
18538 int desired_align = 0, align_bytes = 0;
18539 enum stringop_alg alg;
18540 rtx promoted_val = NULL;
18541 bool force_loopy_epilogue = false;
18543 bool need_zero_guard = false;
18545 if (CONST_INT_P (align_exp))
18546 align = INTVAL (align_exp);
18547 /* i386 can do misaligned access on reasonably increased cost. */
18548 if (CONST_INT_P (expected_align_exp)
18549 && INTVAL (expected_align_exp) > align)
18550 align = INTVAL (expected_align_exp);
18551 if (CONST_INT_P (count_exp))
18552 count = expected_size = INTVAL (count_exp);
18553 if (CONST_INT_P (expected_size_exp) && count == 0)
18554 expected_size = INTVAL (expected_size_exp);
18556 /* Make sure we don't need to care about overflow later on. */
18557 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
18560 /* Step 0: Decide on preferred algorithm, desired alignment and
18561 size of chunks to be copied by main loop. */
18563 alg = decide_alg (count, expected_size, true, &dynamic_check);
18564 desired_align = decide_alignment (align, alg, expected_size);
18566 if (!TARGET_ALIGN_STRINGOPS)
18567 align = desired_align;
18569 if (alg == libcall)
18571 gcc_assert (alg != no_stringop);
18573 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
18574 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18579 gcc_unreachable ();
18581 need_zero_guard = true;
18582 size_needed = GET_MODE_SIZE (Pmode);
18584 case unrolled_loop:
18585 need_zero_guard = true;
18586 size_needed = GET_MODE_SIZE (Pmode) * 4;
18588 case rep_prefix_8_byte:
18591 case rep_prefix_4_byte:
18594 case rep_prefix_1_byte:
18598 need_zero_guard = true;
18602 epilogue_size_needed = size_needed;
18604 /* Step 1: Prologue guard. */
18606 /* Alignment code needs count to be in register. */
18607 if (CONST_INT_P (count_exp) && desired_align > align)
18609 if (INTVAL (count_exp) > desired_align
18610 && INTVAL (count_exp) > size_needed)
18613 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
18614 if (align_bytes <= 0)
18617 align_bytes = desired_align - align_bytes;
18619 if (align_bytes == 0)
18621 enum machine_mode mode = SImode;
18622 if (TARGET_64BIT && (count & ~0xffffffff))
18624 count_exp = force_reg (mode, count_exp);
18627 /* Do the cheap promotion to allow better CSE across the
18628 main loop and epilogue (ie one load of the big constant in the
18629 front of all code. */
18630 if (CONST_INT_P (val_exp))
18631 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18632 desired_align, align);
18633 /* Ensure that alignment prologue won't copy past end of block. */
18634 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18636 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18637 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
18638 Make sure it is power of 2. */
18639 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18641 /* To improve performance of small blocks, we jump around the VAL
18642 promoting mode. This mean that if the promoted VAL is not constant,
18643 we might not use it in the epilogue and have to use byte
18645 if (epilogue_size_needed > 2 && !promoted_val)
18646 force_loopy_epilogue = true;
18649 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18651 /* If main algorithm works on QImode, no epilogue is needed.
18652 For small sizes just don't align anything. */
18653 if (size_needed == 1)
18654 desired_align = align;
18661 label = gen_label_rtx ();
18662 emit_cmp_and_jump_insns (count_exp,
18663 GEN_INT (epilogue_size_needed),
18664 LTU, 0, counter_mode (count_exp), 1, label);
18665 if (expected_size == -1 || expected_size <= epilogue_size_needed)
18666 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18668 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18671 if (dynamic_check != -1)
18673 rtx hot_label = gen_label_rtx ();
18674 jump_around_label = gen_label_rtx ();
18675 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18676 LEU, 0, counter_mode (count_exp), 1, hot_label);
18677 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18678 set_storage_via_libcall (dst, count_exp, val_exp, false);
18679 emit_jump (jump_around_label);
18680 emit_label (hot_label);
18683 /* Step 2: Alignment prologue. */
18685 /* Do the expensive promotion once we branched off the small blocks. */
18687 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18688 desired_align, align);
18689 gcc_assert (desired_align >= 1 && align >= 1);
18691 if (desired_align > align)
18693 if (align_bytes == 0)
18695 /* Except for the first move in epilogue, we no longer know
18696 constant offset in aliasing info. It don't seems to worth
18697 the pain to maintain it for the first move, so throw away
18699 dst = change_address (dst, BLKmode, destreg);
18700 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
18705 /* If we know how many bytes need to be stored before dst is
18706 sufficiently aligned, maintain aliasing info accurately. */
18707 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
18708 desired_align, align_bytes);
18709 count_exp = plus_constant (count_exp, -align_bytes);
18710 count -= align_bytes;
18712 if (need_zero_guard
18713 && (count < (unsigned HOST_WIDE_INT) size_needed
18714 || (align_bytes == 0
18715 && count < ((unsigned HOST_WIDE_INT) size_needed
18716 + desired_align - align))))
18718 /* It is possible that we copied enough so the main loop will not
18720 gcc_assert (size_needed > 1);
18721 if (label == NULL_RTX)
18722 label = gen_label_rtx ();
18723 emit_cmp_and_jump_insns (count_exp,
18724 GEN_INT (size_needed),
18725 LTU, 0, counter_mode (count_exp), 1, label);
18726 if (expected_size == -1
18727 || expected_size < (desired_align - align) / 2 + size_needed)
18728 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18730 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18733 if (label && size_needed == 1)
18735 emit_label (label);
18736 LABEL_NUSES (label) = 1;
18738 promoted_val = val_exp;
18739 epilogue_size_needed = 1;
18741 else if (label == NULL_RTX)
18742 epilogue_size_needed = size_needed;
18744 /* Step 3: Main loop. */
18750 gcc_unreachable ();
18752 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18753 count_exp, QImode, 1, expected_size);
18756 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18757 count_exp, Pmode, 1, expected_size);
18759 case unrolled_loop:
18760 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18761 count_exp, Pmode, 4, expected_size);
18763 case rep_prefix_8_byte:
18764 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18767 case rep_prefix_4_byte:
18768 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18771 case rep_prefix_1_byte:
18772 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18776 /* Adjust properly the offset of src and dest memory for aliasing. */
18777 if (CONST_INT_P (count_exp))
18778 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18779 (count / size_needed) * size_needed);
18781 dst = change_address (dst, BLKmode, destreg);
18783 /* Step 4: Epilogue to copy the remaining bytes. */
18787 /* When the main loop is done, COUNT_EXP might hold original count,
18788 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18789 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18790 bytes. Compensate if needed. */
18792 if (size_needed < epilogue_size_needed)
18795 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18796 GEN_INT (size_needed - 1), count_exp, 1,
18798 if (tmp != count_exp)
18799 emit_move_insn (count_exp, tmp);
18801 emit_label (label);
18802 LABEL_NUSES (label) = 1;
18805 if (count_exp != const0_rtx && epilogue_size_needed > 1)
18807 if (force_loopy_epilogue)
18808 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
18809 epilogue_size_needed);
18811 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
18812 epilogue_size_needed);
18814 if (jump_around_label)
18815 emit_label (jump_around_label);
18819 /* Expand the appropriate insns for doing strlen if not just doing
18822 out = result, initialized with the start address
18823 align_rtx = alignment of the address.
18824 scratch = scratch register, initialized with the startaddress when
18825 not aligned, otherwise undefined
18827 This is just the body. It needs the initializations mentioned above and
18828 some address computing at the end. These things are done in i386.md. */
18831 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
18835 rtx align_2_label = NULL_RTX;
18836 rtx align_3_label = NULL_RTX;
18837 rtx align_4_label = gen_label_rtx ();
18838 rtx end_0_label = gen_label_rtx ();
18840 rtx tmpreg = gen_reg_rtx (SImode);
18841 rtx scratch = gen_reg_rtx (SImode);
18845 if (CONST_INT_P (align_rtx))
18846 align = INTVAL (align_rtx);
18848 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
18850 /* Is there a known alignment and is it less than 4? */
18853 rtx scratch1 = gen_reg_rtx (Pmode);
18854 emit_move_insn (scratch1, out);
18855 /* Is there a known alignment and is it not 2? */
18858 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
18859 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
18861 /* Leave just the 3 lower bits. */
18862 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
18863 NULL_RTX, 0, OPTAB_WIDEN);
18865 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
18866 Pmode, 1, align_4_label);
18867 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
18868 Pmode, 1, align_2_label);
18869 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
18870 Pmode, 1, align_3_label);
18874 /* Since the alignment is 2, we have to check 2 or 0 bytes;
18875 check if is aligned to 4 - byte. */
18877 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
18878 NULL_RTX, 0, OPTAB_WIDEN);
18880 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
18881 Pmode, 1, align_4_label);
18884 mem = change_address (src, QImode, out);
18886 /* Now compare the bytes. */
18888 /* Compare the first n unaligned byte on a byte per byte basis. */
18889 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
18890 QImode, 1, end_0_label);
18892 /* Increment the address. */
18893 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18895 /* Not needed with an alignment of 2 */
18898 emit_label (align_2_label);
18900 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
18903 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18905 emit_label (align_3_label);
18908 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
18911 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18914 /* Generate loop to check 4 bytes at a time. It is not a good idea to
18915 align this loop. It gives only huge programs, but does not help to
18917 emit_label (align_4_label);
18919 mem = change_address (src, SImode, out);
18920 emit_move_insn (scratch, mem);
18921 emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4)));
18923 /* This formula yields a nonzero result iff one of the bytes is zero.
18924 This saves three branches inside loop and many cycles. */
18926 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
18927 emit_insn (gen_one_cmplsi2 (scratch, scratch));
18928 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
18929 emit_insn (gen_andsi3 (tmpreg, tmpreg,
18930 gen_int_mode (0x80808080, SImode)));
18931 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
18936 rtx reg = gen_reg_rtx (SImode);
18937 rtx reg2 = gen_reg_rtx (Pmode);
18938 emit_move_insn (reg, tmpreg);
18939 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
18941 /* If zero is not in the first two bytes, move two bytes forward. */
18942 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
18943 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18944 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
18945 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
18946 gen_rtx_IF_THEN_ELSE (SImode, tmp,
18949 /* Emit lea manually to avoid clobbering of flags. */
18950 emit_insn (gen_rtx_SET (SImode, reg2,
18951 gen_rtx_PLUS (Pmode, out, const2_rtx)));
18953 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18954 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
18955 emit_insn (gen_rtx_SET (VOIDmode, out,
18956 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
18963 rtx end_2_label = gen_label_rtx ();
18964 /* Is zero in the first two bytes? */
18966 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
18967 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18968 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
18969 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
18970 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
18972 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
18973 JUMP_LABEL (tmp) = end_2_label;
18975 /* Not in the first two. Move two bytes forward. */
18976 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
18977 emit_insn ((*ix86_gen_add3) (out, out, const2_rtx));
18979 emit_label (end_2_label);
18983 /* Avoid branch in fixing the byte. */
18984 tmpreg = gen_lowpart (QImode, tmpreg);
18985 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
18986 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
18987 emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), cmp));
18989 emit_label (end_0_label);
18992 /* Expand strlen. */
18995 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
18997 rtx addr, scratch1, scratch2, scratch3, scratch4;
18999 /* The generic case of strlen expander is long. Avoid it's
19000 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
19002 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
19003 && !TARGET_INLINE_ALL_STRINGOPS
19004 && !optimize_insn_for_size_p ()
19005 && (!CONST_INT_P (align) || INTVAL (align) < 4))
19008 addr = force_reg (Pmode, XEXP (src, 0));
19009 scratch1 = gen_reg_rtx (Pmode);
19011 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
19012 && !optimize_insn_for_size_p ())
19014 /* Well it seems that some optimizer does not combine a call like
19015 foo(strlen(bar), strlen(bar));
19016 when the move and the subtraction is done here. It does calculate
19017 the length just once when these instructions are done inside of
19018 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
19019 often used and I use one fewer register for the lifetime of
19020 output_strlen_unroll() this is better. */
19022 emit_move_insn (out, addr);
19024 ix86_expand_strlensi_unroll_1 (out, src, align);
19026 /* strlensi_unroll_1 returns the address of the zero at the end of
19027 the string, like memchr(), so compute the length by subtracting
19028 the start address. */
19029 emit_insn ((*ix86_gen_sub3) (out, out, addr));
19035 /* Can't use this if the user has appropriated eax, ecx, or edi. */
19036 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
19039 scratch2 = gen_reg_rtx (Pmode);
19040 scratch3 = gen_reg_rtx (Pmode);
19041 scratch4 = force_reg (Pmode, constm1_rtx);
19043 emit_move_insn (scratch3, addr);
19044 eoschar = force_reg (QImode, eoschar);
19046 src = replace_equiv_address_nv (src, scratch3);
19048 /* If .md starts supporting :P, this can be done in .md. */
19049 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
19050 scratch4), UNSPEC_SCAS);
19051 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
19052 emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1));
19053 emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx));
19058 /* For given symbol (function) construct code to compute address of it's PLT
19059 entry in large x86-64 PIC model. */
19061 construct_plt_address (rtx symbol)
19063 rtx tmp = gen_reg_rtx (Pmode);
19064 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
19066 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
19067 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
19069 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
19070 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
19075 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
19077 rtx pop, int sibcall)
19079 rtx use = NULL, call;
19081 if (pop == const0_rtx)
19083 gcc_assert (!TARGET_64BIT || !pop);
19085 if (TARGET_MACHO && !TARGET_64BIT)
19088 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
19089 fnaddr = machopic_indirect_call_target (fnaddr);
19094 /* Static functions and indirect calls don't need the pic register. */
19095 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
19096 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
19097 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
19098 use_reg (&use, pic_offset_table_rtx);
19101 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
19103 rtx al = gen_rtx_REG (QImode, AX_REG);
19104 emit_move_insn (al, callarg2);
19105 use_reg (&use, al);
19108 if (ix86_cmodel == CM_LARGE_PIC
19109 && GET_CODE (fnaddr) == MEM
19110 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
19111 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
19112 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
19113 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
19115 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
19116 fnaddr = gen_rtx_MEM (QImode, fnaddr);
19118 if (sibcall && TARGET_64BIT
19119 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
19122 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
19123 fnaddr = gen_rtx_REG (Pmode, R11_REG);
19124 emit_move_insn (fnaddr, addr);
19125 fnaddr = gen_rtx_MEM (QImode, fnaddr);
19128 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
19130 call = gen_rtx_SET (VOIDmode, retval, call);
19133 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
19134 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
19135 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
19138 && ix86_cfun_abi () == MS_ABI
19139 && (!callarg2 || INTVAL (callarg2) != -2))
19141 /* We need to represent that SI and DI registers are clobbered
19143 static int clobbered_registers[] = {
19144 XMM6_REG, XMM7_REG, XMM8_REG,
19145 XMM9_REG, XMM10_REG, XMM11_REG,
19146 XMM12_REG, XMM13_REG, XMM14_REG,
19147 XMM15_REG, SI_REG, DI_REG
19150 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
19151 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
19152 UNSPEC_MS_TO_SYSV_CALL);
19156 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
19157 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
19160 (SSE_REGNO_P (clobbered_registers[i])
19162 clobbered_registers[i]));
19164 call = gen_rtx_PARALLEL (VOIDmode,
19165 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
19169 call = emit_call_insn (call);
19171 CALL_INSN_FUNCTION_USAGE (call) = use;
19175 /* Clear stack slot assignments remembered from previous functions.
19176 This is called from INIT_EXPANDERS once before RTL is emitted for each
19179 static struct machine_function *
19180 ix86_init_machine_status (void)
19182 struct machine_function *f;
19184 f = GGC_CNEW (struct machine_function);
19185 f->use_fast_prologue_epilogue_nregs = -1;
19186 f->tls_descriptor_call_expanded_p = 0;
19187 f->call_abi = ix86_abi;
19192 /* Return a MEM corresponding to a stack slot with mode MODE.
19193 Allocate a new slot if necessary.
19195 The RTL for a function can have several slots available: N is
19196 which slot to use. */
19199 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
19201 struct stack_local_entry *s;
19203 gcc_assert (n < MAX_386_STACK_LOCALS);
19205 /* Virtual slot is valid only before vregs are instantiated. */
19206 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
19208 for (s = ix86_stack_locals; s; s = s->next)
19209 if (s->mode == mode && s->n == n)
19210 return copy_rtx (s->rtl);
19212 s = (struct stack_local_entry *)
19213 ggc_alloc (sizeof (struct stack_local_entry));
19216 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
19218 s->next = ix86_stack_locals;
19219 ix86_stack_locals = s;
19223 /* Construct the SYMBOL_REF for the tls_get_addr function. */
19225 static GTY(()) rtx ix86_tls_symbol;
19227 ix86_tls_get_addr (void)
19230 if (!ix86_tls_symbol)
19232 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
19233 (TARGET_ANY_GNU_TLS
19235 ? "___tls_get_addr"
19236 : "__tls_get_addr");
19239 return ix86_tls_symbol;
19242 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
19244 static GTY(()) rtx ix86_tls_module_base_symbol;
19246 ix86_tls_module_base (void)
19249 if (!ix86_tls_module_base_symbol)
19251 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
19252 "_TLS_MODULE_BASE_");
19253 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
19254 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
19257 return ix86_tls_module_base_symbol;
19260 /* Calculate the length of the memory address in the instruction
19261 encoding. Does not include the one-byte modrm, opcode, or prefix. */
19264 memory_address_length (rtx addr)
19266 struct ix86_address parts;
19267 rtx base, index, disp;
19271 if (GET_CODE (addr) == PRE_DEC
19272 || GET_CODE (addr) == POST_INC
19273 || GET_CODE (addr) == PRE_MODIFY
19274 || GET_CODE (addr) == POST_MODIFY)
19277 ok = ix86_decompose_address (addr, &parts);
19280 if (parts.base && GET_CODE (parts.base) == SUBREG)
19281 parts.base = SUBREG_REG (parts.base);
19282 if (parts.index && GET_CODE (parts.index) == SUBREG)
19283 parts.index = SUBREG_REG (parts.index);
19286 index = parts.index;
19291 - esp as the base always wants an index,
19292 - ebp as the base always wants a displacement. */
19294 /* Register Indirect. */
19295 if (base && !index && !disp)
19297 /* esp (for its index) and ebp (for its displacement) need
19298 the two-byte modrm form. */
19299 if (addr == stack_pointer_rtx
19300 || addr == arg_pointer_rtx
19301 || addr == frame_pointer_rtx
19302 || addr == hard_frame_pointer_rtx)
19306 /* Direct Addressing. */
19307 else if (disp && !base && !index)
19312 /* Find the length of the displacement constant. */
19315 if (base && satisfies_constraint_K (disp))
19320 /* ebp always wants a displacement. */
19321 else if (base == hard_frame_pointer_rtx)
19324 /* An index requires the two-byte modrm form.... */
19326 /* ...like esp, which always wants an index. */
19327 || base == stack_pointer_rtx
19328 || base == arg_pointer_rtx
19329 || base == frame_pointer_rtx)
19336 /* Compute default value for "length_immediate" attribute. When SHORTFORM
19337 is set, expect that insn have 8bit immediate alternative. */
19339 ix86_attr_length_immediate_default (rtx insn, int shortform)
19343 extract_insn_cached (insn);
19344 for (i = recog_data.n_operands - 1; i >= 0; --i)
19345 if (CONSTANT_P (recog_data.operand[i]))
19348 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
19352 switch (get_attr_mode (insn))
19363 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
19368 fatal_insn ("unknown insn mode", insn);
19374 /* Compute default value for "length_address" attribute. */
19376 ix86_attr_length_address_default (rtx insn)
19380 if (get_attr_type (insn) == TYPE_LEA)
19382 rtx set = PATTERN (insn);
19384 if (GET_CODE (set) == PARALLEL)
19385 set = XVECEXP (set, 0, 0);
19387 gcc_assert (GET_CODE (set) == SET);
19389 return memory_address_length (SET_SRC (set));
19392 extract_insn_cached (insn);
19393 for (i = recog_data.n_operands - 1; i >= 0; --i)
19394 if (MEM_P (recog_data.operand[i]))
19396 return memory_address_length (XEXP (recog_data.operand[i], 0));
19402 /* Compute default value for "length_vex" attribute. It includes
19403 2 or 3 byte VEX prefix and 1 opcode byte. */
19406 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
19411 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
19412 byte VEX prefix. */
19413 if (!has_0f_opcode || has_vex_w)
19416 /* We can always use 2 byte VEX prefix in 32bit. */
19420 extract_insn_cached (insn);
19422 for (i = recog_data.n_operands - 1; i >= 0; --i)
19423 if (REG_P (recog_data.operand[i]))
19425 /* REX.W bit uses 3 byte VEX prefix. */
19426 if (GET_MODE (recog_data.operand[i]) == DImode)
19431 /* REX.X or REX.B bits use 3 byte VEX prefix. */
19432 if (MEM_P (recog_data.operand[i])
19433 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
19440 /* Return the maximum number of instructions a cpu can issue. */
19443 ix86_issue_rate (void)
19447 case PROCESSOR_PENTIUM:
19448 case PROCESSOR_ATOM:
19452 case PROCESSOR_PENTIUMPRO:
19453 case PROCESSOR_PENTIUM4:
19454 case PROCESSOR_ATHLON:
19456 case PROCESSOR_AMDFAM10:
19457 case PROCESSOR_NOCONA:
19458 case PROCESSOR_GENERIC32:
19459 case PROCESSOR_GENERIC64:
19462 case PROCESSOR_CORE2:
19470 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
19471 by DEP_INSN and nothing set by DEP_INSN. */
19474 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
19478 /* Simplify the test for uninteresting insns. */
19479 if (insn_type != TYPE_SETCC
19480 && insn_type != TYPE_ICMOV
19481 && insn_type != TYPE_FCMOV
19482 && insn_type != TYPE_IBR)
19485 if ((set = single_set (dep_insn)) != 0)
19487 set = SET_DEST (set);
19490 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
19491 && XVECLEN (PATTERN (dep_insn), 0) == 2
19492 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
19493 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
19495 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19496 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19501 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
19504 /* This test is true if the dependent insn reads the flags but
19505 not any other potentially set register. */
19506 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
19509 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
19515 /* Return true iff USE_INSN has a memory address with operands set by
19519 ix86_agi_dependent (rtx set_insn, rtx use_insn)
19522 extract_insn_cached (use_insn);
19523 for (i = recog_data.n_operands - 1; i >= 0; --i)
19524 if (MEM_P (recog_data.operand[i]))
19526 rtx addr = XEXP (recog_data.operand[i], 0);
19527 return modified_in_p (addr, set_insn) != 0;
19533 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
19535 enum attr_type insn_type, dep_insn_type;
19536 enum attr_memory memory;
19538 int dep_insn_code_number;
19540 /* Anti and output dependencies have zero cost on all CPUs. */
19541 if (REG_NOTE_KIND (link) != 0)
19544 dep_insn_code_number = recog_memoized (dep_insn);
19546 /* If we can't recognize the insns, we can't really do anything. */
19547 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
19550 insn_type = get_attr_type (insn);
19551 dep_insn_type = get_attr_type (dep_insn);
19555 case PROCESSOR_PENTIUM:
19556 /* Address Generation Interlock adds a cycle of latency. */
19557 if (insn_type == TYPE_LEA)
19559 rtx addr = PATTERN (insn);
19561 if (GET_CODE (addr) == PARALLEL)
19562 addr = XVECEXP (addr, 0, 0);
19564 gcc_assert (GET_CODE (addr) == SET);
19566 addr = SET_SRC (addr);
19567 if (modified_in_p (addr, dep_insn))
19570 else if (ix86_agi_dependent (dep_insn, insn))
19573 /* ??? Compares pair with jump/setcc. */
19574 if (ix86_flags_dependent (insn, dep_insn, insn_type))
19577 /* Floating point stores require value to be ready one cycle earlier. */
19578 if (insn_type == TYPE_FMOV
19579 && get_attr_memory (insn) == MEMORY_STORE
19580 && !ix86_agi_dependent (dep_insn, insn))
19584 case PROCESSOR_PENTIUMPRO:
19585 memory = get_attr_memory (insn);
19587 /* INT->FP conversion is expensive. */
19588 if (get_attr_fp_int_src (dep_insn))
19591 /* There is one cycle extra latency between an FP op and a store. */
19592 if (insn_type == TYPE_FMOV
19593 && (set = single_set (dep_insn)) != NULL_RTX
19594 && (set2 = single_set (insn)) != NULL_RTX
19595 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
19596 && MEM_P (SET_DEST (set2)))
19599 /* Show ability of reorder buffer to hide latency of load by executing
19600 in parallel with previous instruction in case
19601 previous instruction is not needed to compute the address. */
19602 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19603 && !ix86_agi_dependent (dep_insn, insn))
19605 /* Claim moves to take one cycle, as core can issue one load
19606 at time and the next load can start cycle later. */
19607 if (dep_insn_type == TYPE_IMOV
19608 || dep_insn_type == TYPE_FMOV)
19616 memory = get_attr_memory (insn);
19618 /* The esp dependency is resolved before the instruction is really
19620 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
19621 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
19624 /* INT->FP conversion is expensive. */
19625 if (get_attr_fp_int_src (dep_insn))
19628 /* Show ability of reorder buffer to hide latency of load by executing
19629 in parallel with previous instruction in case
19630 previous instruction is not needed to compute the address. */
19631 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19632 && !ix86_agi_dependent (dep_insn, insn))
19634 /* Claim moves to take one cycle, as core can issue one load
19635 at time and the next load can start cycle later. */
19636 if (dep_insn_type == TYPE_IMOV
19637 || dep_insn_type == TYPE_FMOV)
19646 case PROCESSOR_ATHLON:
19648 case PROCESSOR_AMDFAM10:
19649 case PROCESSOR_ATOM:
19650 case PROCESSOR_GENERIC32:
19651 case PROCESSOR_GENERIC64:
19652 memory = get_attr_memory (insn);
19654 /* Show ability of reorder buffer to hide latency of load by executing
19655 in parallel with previous instruction in case
19656 previous instruction is not needed to compute the address. */
19657 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19658 && !ix86_agi_dependent (dep_insn, insn))
19660 enum attr_unit unit = get_attr_unit (insn);
19663 /* Because of the difference between the length of integer and
19664 floating unit pipeline preparation stages, the memory operands
19665 for floating point are cheaper.
19667 ??? For Athlon it the difference is most probably 2. */
19668 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
19671 loadcost = TARGET_ATHLON ? 2 : 0;
19673 if (cost >= loadcost)
19686 /* How many alternative schedules to try. This should be as wide as the
19687 scheduling freedom in the DFA, but no wider. Making this value too
19688 large results extra work for the scheduler. */
19691 ia32_multipass_dfa_lookahead (void)
19695 case PROCESSOR_PENTIUM:
19698 case PROCESSOR_PENTIUMPRO:
19708 /* Compute the alignment given to a constant that is being placed in memory.
19709 EXP is the constant and ALIGN is the alignment that the object would
19711 The value of this function is used instead of that alignment to align
19715 ix86_constant_alignment (tree exp, int align)
19717 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
19718 || TREE_CODE (exp) == INTEGER_CST)
19720 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
19722 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
19725 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
19726 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
19727 return BITS_PER_WORD;
19732 /* Compute the alignment for a static variable.
19733 TYPE is the data type, and ALIGN is the alignment that
19734 the object would ordinarily have. The value of this function is used
19735 instead of that alignment to align the object. */
19738 ix86_data_alignment (tree type, int align)
19740 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
19742 if (AGGREGATE_TYPE_P (type)
19743 && TYPE_SIZE (type)
19744 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19745 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
19746 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
19747 && align < max_align)
19750 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
19751 to 16byte boundary. */
19754 if (AGGREGATE_TYPE_P (type)
19755 && TYPE_SIZE (type)
19756 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19757 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
19758 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
19762 if (TREE_CODE (type) == ARRAY_TYPE)
19764 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
19766 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
19769 else if (TREE_CODE (type) == COMPLEX_TYPE)
19772 if (TYPE_MODE (type) == DCmode && align < 64)
19774 if ((TYPE_MODE (type) == XCmode
19775 || TYPE_MODE (type) == TCmode) && align < 128)
19778 else if ((TREE_CODE (type) == RECORD_TYPE
19779 || TREE_CODE (type) == UNION_TYPE
19780 || TREE_CODE (type) == QUAL_UNION_TYPE)
19781 && TYPE_FIELDS (type))
19783 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
19785 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
19788 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
19789 || TREE_CODE (type) == INTEGER_TYPE)
19791 if (TYPE_MODE (type) == DFmode && align < 64)
19793 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
19800 /* Compute the alignment for a local variable or a stack slot. EXP is
19801 the data type or decl itself, MODE is the widest mode available and
19802 ALIGN is the alignment that the object would ordinarily have. The
19803 value of this macro is used instead of that alignment to align the
19807 ix86_local_alignment (tree exp, enum machine_mode mode,
19808 unsigned int align)
19812 if (exp && DECL_P (exp))
19814 type = TREE_TYPE (exp);
19823 /* Don't do dynamic stack realignment for long long objects with
19824 -mpreferred-stack-boundary=2. */
19827 && ix86_preferred_stack_boundary < 64
19828 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
19829 && (!type || !TYPE_USER_ALIGN (type))
19830 && (!decl || !DECL_USER_ALIGN (decl)))
19833 /* If TYPE is NULL, we are allocating a stack slot for caller-save
19834 register in MODE. We will return the largest alignment of XF
19838 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
19839 align = GET_MODE_ALIGNMENT (DFmode);
19843 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
19844 to 16byte boundary. */
19847 if (AGGREGATE_TYPE_P (type)
19848 && TYPE_SIZE (type)
19849 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19850 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
19851 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
19854 if (TREE_CODE (type) == ARRAY_TYPE)
19856 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
19858 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
19861 else if (TREE_CODE (type) == COMPLEX_TYPE)
19863 if (TYPE_MODE (type) == DCmode && align < 64)
19865 if ((TYPE_MODE (type) == XCmode
19866 || TYPE_MODE (type) == TCmode) && align < 128)
19869 else if ((TREE_CODE (type) == RECORD_TYPE
19870 || TREE_CODE (type) == UNION_TYPE
19871 || TREE_CODE (type) == QUAL_UNION_TYPE)
19872 && TYPE_FIELDS (type))
19874 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
19876 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
19879 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
19880 || TREE_CODE (type) == INTEGER_TYPE)
19883 if (TYPE_MODE (type) == DFmode && align < 64)
19885 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
19891 /* Emit RTL insns to initialize the variable parts of a trampoline.
19892 FNADDR is an RTX for the address of the function's pure code.
19893 CXT is an RTX for the static chain value for the function. */
19895 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
19899 /* Compute offset from the end of the jmp to the target function. */
19900 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
19901 plus_constant (tramp, 10),
19902 NULL_RTX, 1, OPTAB_DIRECT);
19903 emit_move_insn (gen_rtx_MEM (QImode, tramp),
19904 gen_int_mode (0xb9, QImode));
19905 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
19906 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
19907 gen_int_mode (0xe9, QImode));
19908 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
19913 /* Try to load address using shorter movl instead of movabs.
19914 We may want to support movq for kernel mode, but kernel does not use
19915 trampolines at the moment. */
19916 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
19918 fnaddr = copy_to_mode_reg (DImode, fnaddr);
19919 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19920 gen_int_mode (0xbb41, HImode));
19921 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
19922 gen_lowpart (SImode, fnaddr));
19927 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19928 gen_int_mode (0xbb49, HImode));
19929 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
19933 /* Load static chain using movabs to r10. */
19934 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19935 gen_int_mode (0xba49, HImode));
19936 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
19939 /* Jump to the r11 */
19940 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19941 gen_int_mode (0xff49, HImode));
19942 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
19943 gen_int_mode (0xe3, QImode));
19945 gcc_assert (offset <= TRAMPOLINE_SIZE);
19948 #ifdef ENABLE_EXECUTE_STACK
19949 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
19950 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
19954 /* Codes for all the SSE/MMX builtins. */
19957 IX86_BUILTIN_ADDPS,
19958 IX86_BUILTIN_ADDSS,
19959 IX86_BUILTIN_DIVPS,
19960 IX86_BUILTIN_DIVSS,
19961 IX86_BUILTIN_MULPS,
19962 IX86_BUILTIN_MULSS,
19963 IX86_BUILTIN_SUBPS,
19964 IX86_BUILTIN_SUBSS,
19966 IX86_BUILTIN_CMPEQPS,
19967 IX86_BUILTIN_CMPLTPS,
19968 IX86_BUILTIN_CMPLEPS,
19969 IX86_BUILTIN_CMPGTPS,
19970 IX86_BUILTIN_CMPGEPS,
19971 IX86_BUILTIN_CMPNEQPS,
19972 IX86_BUILTIN_CMPNLTPS,
19973 IX86_BUILTIN_CMPNLEPS,
19974 IX86_BUILTIN_CMPNGTPS,
19975 IX86_BUILTIN_CMPNGEPS,
19976 IX86_BUILTIN_CMPORDPS,
19977 IX86_BUILTIN_CMPUNORDPS,
19978 IX86_BUILTIN_CMPEQSS,
19979 IX86_BUILTIN_CMPLTSS,
19980 IX86_BUILTIN_CMPLESS,
19981 IX86_BUILTIN_CMPNEQSS,
19982 IX86_BUILTIN_CMPNLTSS,
19983 IX86_BUILTIN_CMPNLESS,
19984 IX86_BUILTIN_CMPNGTSS,
19985 IX86_BUILTIN_CMPNGESS,
19986 IX86_BUILTIN_CMPORDSS,
19987 IX86_BUILTIN_CMPUNORDSS,
19989 IX86_BUILTIN_COMIEQSS,
19990 IX86_BUILTIN_COMILTSS,
19991 IX86_BUILTIN_COMILESS,
19992 IX86_BUILTIN_COMIGTSS,
19993 IX86_BUILTIN_COMIGESS,
19994 IX86_BUILTIN_COMINEQSS,
19995 IX86_BUILTIN_UCOMIEQSS,
19996 IX86_BUILTIN_UCOMILTSS,
19997 IX86_BUILTIN_UCOMILESS,
19998 IX86_BUILTIN_UCOMIGTSS,
19999 IX86_BUILTIN_UCOMIGESS,
20000 IX86_BUILTIN_UCOMINEQSS,
20002 IX86_BUILTIN_CVTPI2PS,
20003 IX86_BUILTIN_CVTPS2PI,
20004 IX86_BUILTIN_CVTSI2SS,
20005 IX86_BUILTIN_CVTSI642SS,
20006 IX86_BUILTIN_CVTSS2SI,
20007 IX86_BUILTIN_CVTSS2SI64,
20008 IX86_BUILTIN_CVTTPS2PI,
20009 IX86_BUILTIN_CVTTSS2SI,
20010 IX86_BUILTIN_CVTTSS2SI64,
20012 IX86_BUILTIN_MAXPS,
20013 IX86_BUILTIN_MAXSS,
20014 IX86_BUILTIN_MINPS,
20015 IX86_BUILTIN_MINSS,
20017 IX86_BUILTIN_LOADUPS,
20018 IX86_BUILTIN_STOREUPS,
20019 IX86_BUILTIN_MOVSS,
20021 IX86_BUILTIN_MOVHLPS,
20022 IX86_BUILTIN_MOVLHPS,
20023 IX86_BUILTIN_LOADHPS,
20024 IX86_BUILTIN_LOADLPS,
20025 IX86_BUILTIN_STOREHPS,
20026 IX86_BUILTIN_STORELPS,
20028 IX86_BUILTIN_MASKMOVQ,
20029 IX86_BUILTIN_MOVMSKPS,
20030 IX86_BUILTIN_PMOVMSKB,
20032 IX86_BUILTIN_MOVNTPS,
20033 IX86_BUILTIN_MOVNTQ,
20035 IX86_BUILTIN_LOADDQU,
20036 IX86_BUILTIN_STOREDQU,
20038 IX86_BUILTIN_PACKSSWB,
20039 IX86_BUILTIN_PACKSSDW,
20040 IX86_BUILTIN_PACKUSWB,
20042 IX86_BUILTIN_PADDB,
20043 IX86_BUILTIN_PADDW,
20044 IX86_BUILTIN_PADDD,
20045 IX86_BUILTIN_PADDQ,
20046 IX86_BUILTIN_PADDSB,
20047 IX86_BUILTIN_PADDSW,
20048 IX86_BUILTIN_PADDUSB,
20049 IX86_BUILTIN_PADDUSW,
20050 IX86_BUILTIN_PSUBB,
20051 IX86_BUILTIN_PSUBW,
20052 IX86_BUILTIN_PSUBD,
20053 IX86_BUILTIN_PSUBQ,
20054 IX86_BUILTIN_PSUBSB,
20055 IX86_BUILTIN_PSUBSW,
20056 IX86_BUILTIN_PSUBUSB,
20057 IX86_BUILTIN_PSUBUSW,
20060 IX86_BUILTIN_PANDN,
20064 IX86_BUILTIN_PAVGB,
20065 IX86_BUILTIN_PAVGW,
20067 IX86_BUILTIN_PCMPEQB,
20068 IX86_BUILTIN_PCMPEQW,
20069 IX86_BUILTIN_PCMPEQD,
20070 IX86_BUILTIN_PCMPGTB,
20071 IX86_BUILTIN_PCMPGTW,
20072 IX86_BUILTIN_PCMPGTD,
20074 IX86_BUILTIN_PMADDWD,
20076 IX86_BUILTIN_PMAXSW,
20077 IX86_BUILTIN_PMAXUB,
20078 IX86_BUILTIN_PMINSW,
20079 IX86_BUILTIN_PMINUB,
20081 IX86_BUILTIN_PMULHUW,
20082 IX86_BUILTIN_PMULHW,
20083 IX86_BUILTIN_PMULLW,
20085 IX86_BUILTIN_PSADBW,
20086 IX86_BUILTIN_PSHUFW,
20088 IX86_BUILTIN_PSLLW,
20089 IX86_BUILTIN_PSLLD,
20090 IX86_BUILTIN_PSLLQ,
20091 IX86_BUILTIN_PSRAW,
20092 IX86_BUILTIN_PSRAD,
20093 IX86_BUILTIN_PSRLW,
20094 IX86_BUILTIN_PSRLD,
20095 IX86_BUILTIN_PSRLQ,
20096 IX86_BUILTIN_PSLLWI,
20097 IX86_BUILTIN_PSLLDI,
20098 IX86_BUILTIN_PSLLQI,
20099 IX86_BUILTIN_PSRAWI,
20100 IX86_BUILTIN_PSRADI,
20101 IX86_BUILTIN_PSRLWI,
20102 IX86_BUILTIN_PSRLDI,
20103 IX86_BUILTIN_PSRLQI,
20105 IX86_BUILTIN_PUNPCKHBW,
20106 IX86_BUILTIN_PUNPCKHWD,
20107 IX86_BUILTIN_PUNPCKHDQ,
20108 IX86_BUILTIN_PUNPCKLBW,
20109 IX86_BUILTIN_PUNPCKLWD,
20110 IX86_BUILTIN_PUNPCKLDQ,
20112 IX86_BUILTIN_SHUFPS,
20114 IX86_BUILTIN_RCPPS,
20115 IX86_BUILTIN_RCPSS,
20116 IX86_BUILTIN_RSQRTPS,
20117 IX86_BUILTIN_RSQRTPS_NR,
20118 IX86_BUILTIN_RSQRTSS,
20119 IX86_BUILTIN_RSQRTF,
20120 IX86_BUILTIN_SQRTPS,
20121 IX86_BUILTIN_SQRTPS_NR,
20122 IX86_BUILTIN_SQRTSS,
20124 IX86_BUILTIN_UNPCKHPS,
20125 IX86_BUILTIN_UNPCKLPS,
20127 IX86_BUILTIN_ANDPS,
20128 IX86_BUILTIN_ANDNPS,
20130 IX86_BUILTIN_XORPS,
20133 IX86_BUILTIN_LDMXCSR,
20134 IX86_BUILTIN_STMXCSR,
20135 IX86_BUILTIN_SFENCE,
20137 /* 3DNow! Original */
20138 IX86_BUILTIN_FEMMS,
20139 IX86_BUILTIN_PAVGUSB,
20140 IX86_BUILTIN_PF2ID,
20141 IX86_BUILTIN_PFACC,
20142 IX86_BUILTIN_PFADD,
20143 IX86_BUILTIN_PFCMPEQ,
20144 IX86_BUILTIN_PFCMPGE,
20145 IX86_BUILTIN_PFCMPGT,
20146 IX86_BUILTIN_PFMAX,
20147 IX86_BUILTIN_PFMIN,
20148 IX86_BUILTIN_PFMUL,
20149 IX86_BUILTIN_PFRCP,
20150 IX86_BUILTIN_PFRCPIT1,
20151 IX86_BUILTIN_PFRCPIT2,
20152 IX86_BUILTIN_PFRSQIT1,
20153 IX86_BUILTIN_PFRSQRT,
20154 IX86_BUILTIN_PFSUB,
20155 IX86_BUILTIN_PFSUBR,
20156 IX86_BUILTIN_PI2FD,
20157 IX86_BUILTIN_PMULHRW,
20159 /* 3DNow! Athlon Extensions */
20160 IX86_BUILTIN_PF2IW,
20161 IX86_BUILTIN_PFNACC,
20162 IX86_BUILTIN_PFPNACC,
20163 IX86_BUILTIN_PI2FW,
20164 IX86_BUILTIN_PSWAPDSI,
20165 IX86_BUILTIN_PSWAPDSF,
20168 IX86_BUILTIN_ADDPD,
20169 IX86_BUILTIN_ADDSD,
20170 IX86_BUILTIN_DIVPD,
20171 IX86_BUILTIN_DIVSD,
20172 IX86_BUILTIN_MULPD,
20173 IX86_BUILTIN_MULSD,
20174 IX86_BUILTIN_SUBPD,
20175 IX86_BUILTIN_SUBSD,
20177 IX86_BUILTIN_CMPEQPD,
20178 IX86_BUILTIN_CMPLTPD,
20179 IX86_BUILTIN_CMPLEPD,
20180 IX86_BUILTIN_CMPGTPD,
20181 IX86_BUILTIN_CMPGEPD,
20182 IX86_BUILTIN_CMPNEQPD,
20183 IX86_BUILTIN_CMPNLTPD,
20184 IX86_BUILTIN_CMPNLEPD,
20185 IX86_BUILTIN_CMPNGTPD,
20186 IX86_BUILTIN_CMPNGEPD,
20187 IX86_BUILTIN_CMPORDPD,
20188 IX86_BUILTIN_CMPUNORDPD,
20189 IX86_BUILTIN_CMPEQSD,
20190 IX86_BUILTIN_CMPLTSD,
20191 IX86_BUILTIN_CMPLESD,
20192 IX86_BUILTIN_CMPNEQSD,
20193 IX86_BUILTIN_CMPNLTSD,
20194 IX86_BUILTIN_CMPNLESD,
20195 IX86_BUILTIN_CMPORDSD,
20196 IX86_BUILTIN_CMPUNORDSD,
20198 IX86_BUILTIN_COMIEQSD,
20199 IX86_BUILTIN_COMILTSD,
20200 IX86_BUILTIN_COMILESD,
20201 IX86_BUILTIN_COMIGTSD,
20202 IX86_BUILTIN_COMIGESD,
20203 IX86_BUILTIN_COMINEQSD,
20204 IX86_BUILTIN_UCOMIEQSD,
20205 IX86_BUILTIN_UCOMILTSD,
20206 IX86_BUILTIN_UCOMILESD,
20207 IX86_BUILTIN_UCOMIGTSD,
20208 IX86_BUILTIN_UCOMIGESD,
20209 IX86_BUILTIN_UCOMINEQSD,
20211 IX86_BUILTIN_MAXPD,
20212 IX86_BUILTIN_MAXSD,
20213 IX86_BUILTIN_MINPD,
20214 IX86_BUILTIN_MINSD,
20216 IX86_BUILTIN_ANDPD,
20217 IX86_BUILTIN_ANDNPD,
20219 IX86_BUILTIN_XORPD,
20221 IX86_BUILTIN_SQRTPD,
20222 IX86_BUILTIN_SQRTSD,
20224 IX86_BUILTIN_UNPCKHPD,
20225 IX86_BUILTIN_UNPCKLPD,
20227 IX86_BUILTIN_SHUFPD,
20229 IX86_BUILTIN_LOADUPD,
20230 IX86_BUILTIN_STOREUPD,
20231 IX86_BUILTIN_MOVSD,
20233 IX86_BUILTIN_LOADHPD,
20234 IX86_BUILTIN_LOADLPD,
20236 IX86_BUILTIN_CVTDQ2PD,
20237 IX86_BUILTIN_CVTDQ2PS,
20239 IX86_BUILTIN_CVTPD2DQ,
20240 IX86_BUILTIN_CVTPD2PI,
20241 IX86_BUILTIN_CVTPD2PS,
20242 IX86_BUILTIN_CVTTPD2DQ,
20243 IX86_BUILTIN_CVTTPD2PI,
20245 IX86_BUILTIN_CVTPI2PD,
20246 IX86_BUILTIN_CVTSI2SD,
20247 IX86_BUILTIN_CVTSI642SD,
20249 IX86_BUILTIN_CVTSD2SI,
20250 IX86_BUILTIN_CVTSD2SI64,
20251 IX86_BUILTIN_CVTSD2SS,
20252 IX86_BUILTIN_CVTSS2SD,
20253 IX86_BUILTIN_CVTTSD2SI,
20254 IX86_BUILTIN_CVTTSD2SI64,
20256 IX86_BUILTIN_CVTPS2DQ,
20257 IX86_BUILTIN_CVTPS2PD,
20258 IX86_BUILTIN_CVTTPS2DQ,
20260 IX86_BUILTIN_MOVNTI,
20261 IX86_BUILTIN_MOVNTPD,
20262 IX86_BUILTIN_MOVNTDQ,
20264 IX86_BUILTIN_MOVQ128,
20267 IX86_BUILTIN_MASKMOVDQU,
20268 IX86_BUILTIN_MOVMSKPD,
20269 IX86_BUILTIN_PMOVMSKB128,
20271 IX86_BUILTIN_PACKSSWB128,
20272 IX86_BUILTIN_PACKSSDW128,
20273 IX86_BUILTIN_PACKUSWB128,
20275 IX86_BUILTIN_PADDB128,
20276 IX86_BUILTIN_PADDW128,
20277 IX86_BUILTIN_PADDD128,
20278 IX86_BUILTIN_PADDQ128,
20279 IX86_BUILTIN_PADDSB128,
20280 IX86_BUILTIN_PADDSW128,
20281 IX86_BUILTIN_PADDUSB128,
20282 IX86_BUILTIN_PADDUSW128,
20283 IX86_BUILTIN_PSUBB128,
20284 IX86_BUILTIN_PSUBW128,
20285 IX86_BUILTIN_PSUBD128,
20286 IX86_BUILTIN_PSUBQ128,
20287 IX86_BUILTIN_PSUBSB128,
20288 IX86_BUILTIN_PSUBSW128,
20289 IX86_BUILTIN_PSUBUSB128,
20290 IX86_BUILTIN_PSUBUSW128,
20292 IX86_BUILTIN_PAND128,
20293 IX86_BUILTIN_PANDN128,
20294 IX86_BUILTIN_POR128,
20295 IX86_BUILTIN_PXOR128,
20297 IX86_BUILTIN_PAVGB128,
20298 IX86_BUILTIN_PAVGW128,
20300 IX86_BUILTIN_PCMPEQB128,
20301 IX86_BUILTIN_PCMPEQW128,
20302 IX86_BUILTIN_PCMPEQD128,
20303 IX86_BUILTIN_PCMPGTB128,
20304 IX86_BUILTIN_PCMPGTW128,
20305 IX86_BUILTIN_PCMPGTD128,
20307 IX86_BUILTIN_PMADDWD128,
20309 IX86_BUILTIN_PMAXSW128,
20310 IX86_BUILTIN_PMAXUB128,
20311 IX86_BUILTIN_PMINSW128,
20312 IX86_BUILTIN_PMINUB128,
20314 IX86_BUILTIN_PMULUDQ,
20315 IX86_BUILTIN_PMULUDQ128,
20316 IX86_BUILTIN_PMULHUW128,
20317 IX86_BUILTIN_PMULHW128,
20318 IX86_BUILTIN_PMULLW128,
20320 IX86_BUILTIN_PSADBW128,
20321 IX86_BUILTIN_PSHUFHW,
20322 IX86_BUILTIN_PSHUFLW,
20323 IX86_BUILTIN_PSHUFD,
20325 IX86_BUILTIN_PSLLDQI128,
20326 IX86_BUILTIN_PSLLWI128,
20327 IX86_BUILTIN_PSLLDI128,
20328 IX86_BUILTIN_PSLLQI128,
20329 IX86_BUILTIN_PSRAWI128,
20330 IX86_BUILTIN_PSRADI128,
20331 IX86_BUILTIN_PSRLDQI128,
20332 IX86_BUILTIN_PSRLWI128,
20333 IX86_BUILTIN_PSRLDI128,
20334 IX86_BUILTIN_PSRLQI128,
20336 IX86_BUILTIN_PSLLDQ128,
20337 IX86_BUILTIN_PSLLW128,
20338 IX86_BUILTIN_PSLLD128,
20339 IX86_BUILTIN_PSLLQ128,
20340 IX86_BUILTIN_PSRAW128,
20341 IX86_BUILTIN_PSRAD128,
20342 IX86_BUILTIN_PSRLW128,
20343 IX86_BUILTIN_PSRLD128,
20344 IX86_BUILTIN_PSRLQ128,
20346 IX86_BUILTIN_PUNPCKHBW128,
20347 IX86_BUILTIN_PUNPCKHWD128,
20348 IX86_BUILTIN_PUNPCKHDQ128,
20349 IX86_BUILTIN_PUNPCKHQDQ128,
20350 IX86_BUILTIN_PUNPCKLBW128,
20351 IX86_BUILTIN_PUNPCKLWD128,
20352 IX86_BUILTIN_PUNPCKLDQ128,
20353 IX86_BUILTIN_PUNPCKLQDQ128,
20355 IX86_BUILTIN_CLFLUSH,
20356 IX86_BUILTIN_MFENCE,
20357 IX86_BUILTIN_LFENCE,
20360 IX86_BUILTIN_ADDSUBPS,
20361 IX86_BUILTIN_HADDPS,
20362 IX86_BUILTIN_HSUBPS,
20363 IX86_BUILTIN_MOVSHDUP,
20364 IX86_BUILTIN_MOVSLDUP,
20365 IX86_BUILTIN_ADDSUBPD,
20366 IX86_BUILTIN_HADDPD,
20367 IX86_BUILTIN_HSUBPD,
20368 IX86_BUILTIN_LDDQU,
20370 IX86_BUILTIN_MONITOR,
20371 IX86_BUILTIN_MWAIT,
20374 IX86_BUILTIN_PHADDW,
20375 IX86_BUILTIN_PHADDD,
20376 IX86_BUILTIN_PHADDSW,
20377 IX86_BUILTIN_PHSUBW,
20378 IX86_BUILTIN_PHSUBD,
20379 IX86_BUILTIN_PHSUBSW,
20380 IX86_BUILTIN_PMADDUBSW,
20381 IX86_BUILTIN_PMULHRSW,
20382 IX86_BUILTIN_PSHUFB,
20383 IX86_BUILTIN_PSIGNB,
20384 IX86_BUILTIN_PSIGNW,
20385 IX86_BUILTIN_PSIGND,
20386 IX86_BUILTIN_PALIGNR,
20387 IX86_BUILTIN_PABSB,
20388 IX86_BUILTIN_PABSW,
20389 IX86_BUILTIN_PABSD,
20391 IX86_BUILTIN_PHADDW128,
20392 IX86_BUILTIN_PHADDD128,
20393 IX86_BUILTIN_PHADDSW128,
20394 IX86_BUILTIN_PHSUBW128,
20395 IX86_BUILTIN_PHSUBD128,
20396 IX86_BUILTIN_PHSUBSW128,
20397 IX86_BUILTIN_PMADDUBSW128,
20398 IX86_BUILTIN_PMULHRSW128,
20399 IX86_BUILTIN_PSHUFB128,
20400 IX86_BUILTIN_PSIGNB128,
20401 IX86_BUILTIN_PSIGNW128,
20402 IX86_BUILTIN_PSIGND128,
20403 IX86_BUILTIN_PALIGNR128,
20404 IX86_BUILTIN_PABSB128,
20405 IX86_BUILTIN_PABSW128,
20406 IX86_BUILTIN_PABSD128,
20408 /* AMDFAM10 - SSE4A New Instructions. */
20409 IX86_BUILTIN_MOVNTSD,
20410 IX86_BUILTIN_MOVNTSS,
20411 IX86_BUILTIN_EXTRQI,
20412 IX86_BUILTIN_EXTRQ,
20413 IX86_BUILTIN_INSERTQI,
20414 IX86_BUILTIN_INSERTQ,
20417 IX86_BUILTIN_BLENDPD,
20418 IX86_BUILTIN_BLENDPS,
20419 IX86_BUILTIN_BLENDVPD,
20420 IX86_BUILTIN_BLENDVPS,
20421 IX86_BUILTIN_PBLENDVB128,
20422 IX86_BUILTIN_PBLENDW128,
20427 IX86_BUILTIN_INSERTPS128,
20429 IX86_BUILTIN_MOVNTDQA,
20430 IX86_BUILTIN_MPSADBW128,
20431 IX86_BUILTIN_PACKUSDW128,
20432 IX86_BUILTIN_PCMPEQQ,
20433 IX86_BUILTIN_PHMINPOSUW128,
20435 IX86_BUILTIN_PMAXSB128,
20436 IX86_BUILTIN_PMAXSD128,
20437 IX86_BUILTIN_PMAXUD128,
20438 IX86_BUILTIN_PMAXUW128,
20440 IX86_BUILTIN_PMINSB128,
20441 IX86_BUILTIN_PMINSD128,
20442 IX86_BUILTIN_PMINUD128,
20443 IX86_BUILTIN_PMINUW128,
20445 IX86_BUILTIN_PMOVSXBW128,
20446 IX86_BUILTIN_PMOVSXBD128,
20447 IX86_BUILTIN_PMOVSXBQ128,
20448 IX86_BUILTIN_PMOVSXWD128,
20449 IX86_BUILTIN_PMOVSXWQ128,
20450 IX86_BUILTIN_PMOVSXDQ128,
20452 IX86_BUILTIN_PMOVZXBW128,
20453 IX86_BUILTIN_PMOVZXBD128,
20454 IX86_BUILTIN_PMOVZXBQ128,
20455 IX86_BUILTIN_PMOVZXWD128,
20456 IX86_BUILTIN_PMOVZXWQ128,
20457 IX86_BUILTIN_PMOVZXDQ128,
20459 IX86_BUILTIN_PMULDQ128,
20460 IX86_BUILTIN_PMULLD128,
20462 IX86_BUILTIN_ROUNDPD,
20463 IX86_BUILTIN_ROUNDPS,
20464 IX86_BUILTIN_ROUNDSD,
20465 IX86_BUILTIN_ROUNDSS,
20467 IX86_BUILTIN_PTESTZ,
20468 IX86_BUILTIN_PTESTC,
20469 IX86_BUILTIN_PTESTNZC,
20471 IX86_BUILTIN_VEC_INIT_V2SI,
20472 IX86_BUILTIN_VEC_INIT_V4HI,
20473 IX86_BUILTIN_VEC_INIT_V8QI,
20474 IX86_BUILTIN_VEC_EXT_V2DF,
20475 IX86_BUILTIN_VEC_EXT_V2DI,
20476 IX86_BUILTIN_VEC_EXT_V4SF,
20477 IX86_BUILTIN_VEC_EXT_V4SI,
20478 IX86_BUILTIN_VEC_EXT_V8HI,
20479 IX86_BUILTIN_VEC_EXT_V2SI,
20480 IX86_BUILTIN_VEC_EXT_V4HI,
20481 IX86_BUILTIN_VEC_EXT_V16QI,
20482 IX86_BUILTIN_VEC_SET_V2DI,
20483 IX86_BUILTIN_VEC_SET_V4SF,
20484 IX86_BUILTIN_VEC_SET_V4SI,
20485 IX86_BUILTIN_VEC_SET_V8HI,
20486 IX86_BUILTIN_VEC_SET_V4HI,
20487 IX86_BUILTIN_VEC_SET_V16QI,
20489 IX86_BUILTIN_VEC_PACK_SFIX,
20492 IX86_BUILTIN_CRC32QI,
20493 IX86_BUILTIN_CRC32HI,
20494 IX86_BUILTIN_CRC32SI,
20495 IX86_BUILTIN_CRC32DI,
20497 IX86_BUILTIN_PCMPESTRI128,
20498 IX86_BUILTIN_PCMPESTRM128,
20499 IX86_BUILTIN_PCMPESTRA128,
20500 IX86_BUILTIN_PCMPESTRC128,
20501 IX86_BUILTIN_PCMPESTRO128,
20502 IX86_BUILTIN_PCMPESTRS128,
20503 IX86_BUILTIN_PCMPESTRZ128,
20504 IX86_BUILTIN_PCMPISTRI128,
20505 IX86_BUILTIN_PCMPISTRM128,
20506 IX86_BUILTIN_PCMPISTRA128,
20507 IX86_BUILTIN_PCMPISTRC128,
20508 IX86_BUILTIN_PCMPISTRO128,
20509 IX86_BUILTIN_PCMPISTRS128,
20510 IX86_BUILTIN_PCMPISTRZ128,
20512 IX86_BUILTIN_PCMPGTQ,
20514 /* AES instructions */
20515 IX86_BUILTIN_AESENC128,
20516 IX86_BUILTIN_AESENCLAST128,
20517 IX86_BUILTIN_AESDEC128,
20518 IX86_BUILTIN_AESDECLAST128,
20519 IX86_BUILTIN_AESIMC128,
20520 IX86_BUILTIN_AESKEYGENASSIST128,
20522 /* PCLMUL instruction */
20523 IX86_BUILTIN_PCLMULQDQ128,
20526 IX86_BUILTIN_ADDPD256,
20527 IX86_BUILTIN_ADDPS256,
20528 IX86_BUILTIN_ADDSUBPD256,
20529 IX86_BUILTIN_ADDSUBPS256,
20530 IX86_BUILTIN_ANDPD256,
20531 IX86_BUILTIN_ANDPS256,
20532 IX86_BUILTIN_ANDNPD256,
20533 IX86_BUILTIN_ANDNPS256,
20534 IX86_BUILTIN_BLENDPD256,
20535 IX86_BUILTIN_BLENDPS256,
20536 IX86_BUILTIN_BLENDVPD256,
20537 IX86_BUILTIN_BLENDVPS256,
20538 IX86_BUILTIN_DIVPD256,
20539 IX86_BUILTIN_DIVPS256,
20540 IX86_BUILTIN_DPPS256,
20541 IX86_BUILTIN_HADDPD256,
20542 IX86_BUILTIN_HADDPS256,
20543 IX86_BUILTIN_HSUBPD256,
20544 IX86_BUILTIN_HSUBPS256,
20545 IX86_BUILTIN_MAXPD256,
20546 IX86_BUILTIN_MAXPS256,
20547 IX86_BUILTIN_MINPD256,
20548 IX86_BUILTIN_MINPS256,
20549 IX86_BUILTIN_MULPD256,
20550 IX86_BUILTIN_MULPS256,
20551 IX86_BUILTIN_ORPD256,
20552 IX86_BUILTIN_ORPS256,
20553 IX86_BUILTIN_SHUFPD256,
20554 IX86_BUILTIN_SHUFPS256,
20555 IX86_BUILTIN_SUBPD256,
20556 IX86_BUILTIN_SUBPS256,
20557 IX86_BUILTIN_XORPD256,
20558 IX86_BUILTIN_XORPS256,
20559 IX86_BUILTIN_CMPSD,
20560 IX86_BUILTIN_CMPSS,
20561 IX86_BUILTIN_CMPPD,
20562 IX86_BUILTIN_CMPPS,
20563 IX86_BUILTIN_CMPPD256,
20564 IX86_BUILTIN_CMPPS256,
20565 IX86_BUILTIN_CVTDQ2PD256,
20566 IX86_BUILTIN_CVTDQ2PS256,
20567 IX86_BUILTIN_CVTPD2PS256,
20568 IX86_BUILTIN_CVTPS2DQ256,
20569 IX86_BUILTIN_CVTPS2PD256,
20570 IX86_BUILTIN_CVTTPD2DQ256,
20571 IX86_BUILTIN_CVTPD2DQ256,
20572 IX86_BUILTIN_CVTTPS2DQ256,
20573 IX86_BUILTIN_EXTRACTF128PD256,
20574 IX86_BUILTIN_EXTRACTF128PS256,
20575 IX86_BUILTIN_EXTRACTF128SI256,
20576 IX86_BUILTIN_VZEROALL,
20577 IX86_BUILTIN_VZEROUPPER,
20578 IX86_BUILTIN_VZEROUPPER_REX64,
20579 IX86_BUILTIN_VPERMILVARPD,
20580 IX86_BUILTIN_VPERMILVARPS,
20581 IX86_BUILTIN_VPERMILVARPD256,
20582 IX86_BUILTIN_VPERMILVARPS256,
20583 IX86_BUILTIN_VPERMILPD,
20584 IX86_BUILTIN_VPERMILPS,
20585 IX86_BUILTIN_VPERMILPD256,
20586 IX86_BUILTIN_VPERMILPS256,
20587 IX86_BUILTIN_VPERM2F128PD256,
20588 IX86_BUILTIN_VPERM2F128PS256,
20589 IX86_BUILTIN_VPERM2F128SI256,
20590 IX86_BUILTIN_VBROADCASTSS,
20591 IX86_BUILTIN_VBROADCASTSD256,
20592 IX86_BUILTIN_VBROADCASTSS256,
20593 IX86_BUILTIN_VBROADCASTPD256,
20594 IX86_BUILTIN_VBROADCASTPS256,
20595 IX86_BUILTIN_VINSERTF128PD256,
20596 IX86_BUILTIN_VINSERTF128PS256,
20597 IX86_BUILTIN_VINSERTF128SI256,
20598 IX86_BUILTIN_LOADUPD256,
20599 IX86_BUILTIN_LOADUPS256,
20600 IX86_BUILTIN_STOREUPD256,
20601 IX86_BUILTIN_STOREUPS256,
20602 IX86_BUILTIN_LDDQU256,
20603 IX86_BUILTIN_MOVNTDQ256,
20604 IX86_BUILTIN_MOVNTPD256,
20605 IX86_BUILTIN_MOVNTPS256,
20606 IX86_BUILTIN_LOADDQU256,
20607 IX86_BUILTIN_STOREDQU256,
20608 IX86_BUILTIN_MASKLOADPD,
20609 IX86_BUILTIN_MASKLOADPS,
20610 IX86_BUILTIN_MASKSTOREPD,
20611 IX86_BUILTIN_MASKSTOREPS,
20612 IX86_BUILTIN_MASKLOADPD256,
20613 IX86_BUILTIN_MASKLOADPS256,
20614 IX86_BUILTIN_MASKSTOREPD256,
20615 IX86_BUILTIN_MASKSTOREPS256,
20616 IX86_BUILTIN_MOVSHDUP256,
20617 IX86_BUILTIN_MOVSLDUP256,
20618 IX86_BUILTIN_MOVDDUP256,
20620 IX86_BUILTIN_SQRTPD256,
20621 IX86_BUILTIN_SQRTPS256,
20622 IX86_BUILTIN_SQRTPS_NR256,
20623 IX86_BUILTIN_RSQRTPS256,
20624 IX86_BUILTIN_RSQRTPS_NR256,
20626 IX86_BUILTIN_RCPPS256,
20628 IX86_BUILTIN_ROUNDPD256,
20629 IX86_BUILTIN_ROUNDPS256,
20631 IX86_BUILTIN_UNPCKHPD256,
20632 IX86_BUILTIN_UNPCKLPD256,
20633 IX86_BUILTIN_UNPCKHPS256,
20634 IX86_BUILTIN_UNPCKLPS256,
20636 IX86_BUILTIN_SI256_SI,
20637 IX86_BUILTIN_PS256_PS,
20638 IX86_BUILTIN_PD256_PD,
20639 IX86_BUILTIN_SI_SI256,
20640 IX86_BUILTIN_PS_PS256,
20641 IX86_BUILTIN_PD_PD256,
20643 IX86_BUILTIN_VTESTZPD,
20644 IX86_BUILTIN_VTESTCPD,
20645 IX86_BUILTIN_VTESTNZCPD,
20646 IX86_BUILTIN_VTESTZPS,
20647 IX86_BUILTIN_VTESTCPS,
20648 IX86_BUILTIN_VTESTNZCPS,
20649 IX86_BUILTIN_VTESTZPD256,
20650 IX86_BUILTIN_VTESTCPD256,
20651 IX86_BUILTIN_VTESTNZCPD256,
20652 IX86_BUILTIN_VTESTZPS256,
20653 IX86_BUILTIN_VTESTCPS256,
20654 IX86_BUILTIN_VTESTNZCPS256,
20655 IX86_BUILTIN_PTESTZ256,
20656 IX86_BUILTIN_PTESTC256,
20657 IX86_BUILTIN_PTESTNZC256,
20659 IX86_BUILTIN_MOVMSKPD256,
20660 IX86_BUILTIN_MOVMSKPS256,
20662 /* TFmode support builtins. */
20664 IX86_BUILTIN_HUGE_VALQ,
20665 IX86_BUILTIN_FABSQ,
20666 IX86_BUILTIN_COPYSIGNQ,
20668 /* SSE5 instructions */
20669 IX86_BUILTIN_FMADDSS,
20670 IX86_BUILTIN_FMADDSD,
20671 IX86_BUILTIN_FMADDPS,
20672 IX86_BUILTIN_FMADDPD,
20673 IX86_BUILTIN_FMSUBSS,
20674 IX86_BUILTIN_FMSUBSD,
20675 IX86_BUILTIN_FMSUBPS,
20676 IX86_BUILTIN_FMSUBPD,
20677 IX86_BUILTIN_FNMADDSS,
20678 IX86_BUILTIN_FNMADDSD,
20679 IX86_BUILTIN_FNMADDPS,
20680 IX86_BUILTIN_FNMADDPD,
20681 IX86_BUILTIN_FNMSUBSS,
20682 IX86_BUILTIN_FNMSUBSD,
20683 IX86_BUILTIN_FNMSUBPS,
20684 IX86_BUILTIN_FNMSUBPD,
20685 IX86_BUILTIN_PCMOV,
20686 IX86_BUILTIN_PCMOV_V2DI,
20687 IX86_BUILTIN_PCMOV_V4SI,
20688 IX86_BUILTIN_PCMOV_V8HI,
20689 IX86_BUILTIN_PCMOV_V16QI,
20690 IX86_BUILTIN_PCMOV_V4SF,
20691 IX86_BUILTIN_PCMOV_V2DF,
20692 IX86_BUILTIN_PPERM,
20693 IX86_BUILTIN_PERMPS,
20694 IX86_BUILTIN_PERMPD,
20695 IX86_BUILTIN_PMACSSWW,
20696 IX86_BUILTIN_PMACSWW,
20697 IX86_BUILTIN_PMACSSWD,
20698 IX86_BUILTIN_PMACSWD,
20699 IX86_BUILTIN_PMACSSDD,
20700 IX86_BUILTIN_PMACSDD,
20701 IX86_BUILTIN_PMACSSDQL,
20702 IX86_BUILTIN_PMACSSDQH,
20703 IX86_BUILTIN_PMACSDQL,
20704 IX86_BUILTIN_PMACSDQH,
20705 IX86_BUILTIN_PMADCSSWD,
20706 IX86_BUILTIN_PMADCSWD,
20707 IX86_BUILTIN_PHADDBW,
20708 IX86_BUILTIN_PHADDBD,
20709 IX86_BUILTIN_PHADDBQ,
20710 IX86_BUILTIN_PHADDWD,
20711 IX86_BUILTIN_PHADDWQ,
20712 IX86_BUILTIN_PHADDDQ,
20713 IX86_BUILTIN_PHADDUBW,
20714 IX86_BUILTIN_PHADDUBD,
20715 IX86_BUILTIN_PHADDUBQ,
20716 IX86_BUILTIN_PHADDUWD,
20717 IX86_BUILTIN_PHADDUWQ,
20718 IX86_BUILTIN_PHADDUDQ,
20719 IX86_BUILTIN_PHSUBBW,
20720 IX86_BUILTIN_PHSUBWD,
20721 IX86_BUILTIN_PHSUBDQ,
20722 IX86_BUILTIN_PROTB,
20723 IX86_BUILTIN_PROTW,
20724 IX86_BUILTIN_PROTD,
20725 IX86_BUILTIN_PROTQ,
20726 IX86_BUILTIN_PROTB_IMM,
20727 IX86_BUILTIN_PROTW_IMM,
20728 IX86_BUILTIN_PROTD_IMM,
20729 IX86_BUILTIN_PROTQ_IMM,
20730 IX86_BUILTIN_PSHLB,
20731 IX86_BUILTIN_PSHLW,
20732 IX86_BUILTIN_PSHLD,
20733 IX86_BUILTIN_PSHLQ,
20734 IX86_BUILTIN_PSHAB,
20735 IX86_BUILTIN_PSHAW,
20736 IX86_BUILTIN_PSHAD,
20737 IX86_BUILTIN_PSHAQ,
20738 IX86_BUILTIN_FRCZSS,
20739 IX86_BUILTIN_FRCZSD,
20740 IX86_BUILTIN_FRCZPS,
20741 IX86_BUILTIN_FRCZPD,
20742 IX86_BUILTIN_CVTPH2PS,
20743 IX86_BUILTIN_CVTPS2PH,
20745 IX86_BUILTIN_COMEQSS,
20746 IX86_BUILTIN_COMNESS,
20747 IX86_BUILTIN_COMLTSS,
20748 IX86_BUILTIN_COMLESS,
20749 IX86_BUILTIN_COMGTSS,
20750 IX86_BUILTIN_COMGESS,
20751 IX86_BUILTIN_COMUEQSS,
20752 IX86_BUILTIN_COMUNESS,
20753 IX86_BUILTIN_COMULTSS,
20754 IX86_BUILTIN_COMULESS,
20755 IX86_BUILTIN_COMUGTSS,
20756 IX86_BUILTIN_COMUGESS,
20757 IX86_BUILTIN_COMORDSS,
20758 IX86_BUILTIN_COMUNORDSS,
20759 IX86_BUILTIN_COMFALSESS,
20760 IX86_BUILTIN_COMTRUESS,
20762 IX86_BUILTIN_COMEQSD,
20763 IX86_BUILTIN_COMNESD,
20764 IX86_BUILTIN_COMLTSD,
20765 IX86_BUILTIN_COMLESD,
20766 IX86_BUILTIN_COMGTSD,
20767 IX86_BUILTIN_COMGESD,
20768 IX86_BUILTIN_COMUEQSD,
20769 IX86_BUILTIN_COMUNESD,
20770 IX86_BUILTIN_COMULTSD,
20771 IX86_BUILTIN_COMULESD,
20772 IX86_BUILTIN_COMUGTSD,
20773 IX86_BUILTIN_COMUGESD,
20774 IX86_BUILTIN_COMORDSD,
20775 IX86_BUILTIN_COMUNORDSD,
20776 IX86_BUILTIN_COMFALSESD,
20777 IX86_BUILTIN_COMTRUESD,
20779 IX86_BUILTIN_COMEQPS,
20780 IX86_BUILTIN_COMNEPS,
20781 IX86_BUILTIN_COMLTPS,
20782 IX86_BUILTIN_COMLEPS,
20783 IX86_BUILTIN_COMGTPS,
20784 IX86_BUILTIN_COMGEPS,
20785 IX86_BUILTIN_COMUEQPS,
20786 IX86_BUILTIN_COMUNEPS,
20787 IX86_BUILTIN_COMULTPS,
20788 IX86_BUILTIN_COMULEPS,
20789 IX86_BUILTIN_COMUGTPS,
20790 IX86_BUILTIN_COMUGEPS,
20791 IX86_BUILTIN_COMORDPS,
20792 IX86_BUILTIN_COMUNORDPS,
20793 IX86_BUILTIN_COMFALSEPS,
20794 IX86_BUILTIN_COMTRUEPS,
20796 IX86_BUILTIN_COMEQPD,
20797 IX86_BUILTIN_COMNEPD,
20798 IX86_BUILTIN_COMLTPD,
20799 IX86_BUILTIN_COMLEPD,
20800 IX86_BUILTIN_COMGTPD,
20801 IX86_BUILTIN_COMGEPD,
20802 IX86_BUILTIN_COMUEQPD,
20803 IX86_BUILTIN_COMUNEPD,
20804 IX86_BUILTIN_COMULTPD,
20805 IX86_BUILTIN_COMULEPD,
20806 IX86_BUILTIN_COMUGTPD,
20807 IX86_BUILTIN_COMUGEPD,
20808 IX86_BUILTIN_COMORDPD,
20809 IX86_BUILTIN_COMUNORDPD,
20810 IX86_BUILTIN_COMFALSEPD,
20811 IX86_BUILTIN_COMTRUEPD,
20813 IX86_BUILTIN_PCOMEQUB,
20814 IX86_BUILTIN_PCOMNEUB,
20815 IX86_BUILTIN_PCOMLTUB,
20816 IX86_BUILTIN_PCOMLEUB,
20817 IX86_BUILTIN_PCOMGTUB,
20818 IX86_BUILTIN_PCOMGEUB,
20819 IX86_BUILTIN_PCOMFALSEUB,
20820 IX86_BUILTIN_PCOMTRUEUB,
20821 IX86_BUILTIN_PCOMEQUW,
20822 IX86_BUILTIN_PCOMNEUW,
20823 IX86_BUILTIN_PCOMLTUW,
20824 IX86_BUILTIN_PCOMLEUW,
20825 IX86_BUILTIN_PCOMGTUW,
20826 IX86_BUILTIN_PCOMGEUW,
20827 IX86_BUILTIN_PCOMFALSEUW,
20828 IX86_BUILTIN_PCOMTRUEUW,
20829 IX86_BUILTIN_PCOMEQUD,
20830 IX86_BUILTIN_PCOMNEUD,
20831 IX86_BUILTIN_PCOMLTUD,
20832 IX86_BUILTIN_PCOMLEUD,
20833 IX86_BUILTIN_PCOMGTUD,
20834 IX86_BUILTIN_PCOMGEUD,
20835 IX86_BUILTIN_PCOMFALSEUD,
20836 IX86_BUILTIN_PCOMTRUEUD,
20837 IX86_BUILTIN_PCOMEQUQ,
20838 IX86_BUILTIN_PCOMNEUQ,
20839 IX86_BUILTIN_PCOMLTUQ,
20840 IX86_BUILTIN_PCOMLEUQ,
20841 IX86_BUILTIN_PCOMGTUQ,
20842 IX86_BUILTIN_PCOMGEUQ,
20843 IX86_BUILTIN_PCOMFALSEUQ,
20844 IX86_BUILTIN_PCOMTRUEUQ,
20846 IX86_BUILTIN_PCOMEQB,
20847 IX86_BUILTIN_PCOMNEB,
20848 IX86_BUILTIN_PCOMLTB,
20849 IX86_BUILTIN_PCOMLEB,
20850 IX86_BUILTIN_PCOMGTB,
20851 IX86_BUILTIN_PCOMGEB,
20852 IX86_BUILTIN_PCOMFALSEB,
20853 IX86_BUILTIN_PCOMTRUEB,
20854 IX86_BUILTIN_PCOMEQW,
20855 IX86_BUILTIN_PCOMNEW,
20856 IX86_BUILTIN_PCOMLTW,
20857 IX86_BUILTIN_PCOMLEW,
20858 IX86_BUILTIN_PCOMGTW,
20859 IX86_BUILTIN_PCOMGEW,
20860 IX86_BUILTIN_PCOMFALSEW,
20861 IX86_BUILTIN_PCOMTRUEW,
20862 IX86_BUILTIN_PCOMEQD,
20863 IX86_BUILTIN_PCOMNED,
20864 IX86_BUILTIN_PCOMLTD,
20865 IX86_BUILTIN_PCOMLED,
20866 IX86_BUILTIN_PCOMGTD,
20867 IX86_BUILTIN_PCOMGED,
20868 IX86_BUILTIN_PCOMFALSED,
20869 IX86_BUILTIN_PCOMTRUED,
20870 IX86_BUILTIN_PCOMEQQ,
20871 IX86_BUILTIN_PCOMNEQ,
20872 IX86_BUILTIN_PCOMLTQ,
20873 IX86_BUILTIN_PCOMLEQ,
20874 IX86_BUILTIN_PCOMGTQ,
20875 IX86_BUILTIN_PCOMGEQ,
20876 IX86_BUILTIN_PCOMFALSEQ,
20877 IX86_BUILTIN_PCOMTRUEQ,
20882 /* Table for the ix86 builtin decls. */
20883 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
20885 /* Table of all of the builtin functions that are possible with different ISA's
20886 but are waiting to be built until a function is declared to use that
20888 struct GTY(()) builtin_isa {
20889 tree type; /* builtin type to use in the declaration */
20890 const char *name; /* function name */
20891 int isa; /* isa_flags this builtin is defined for */
20892 bool const_p; /* true if the declaration is constant */
20895 static GTY(()) struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
20898 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
20899 * of which isa_flags to use in the ix86_builtins_isa array. Stores the
20900 * function decl in the ix86_builtins array. Returns the function decl or
20901 * NULL_TREE, if the builtin was not added.
20903 * If the front end has a special hook for builtin functions, delay adding
20904 * builtin functions that aren't in the current ISA until the ISA is changed
20905 * with function specific optimization. Doing so, can save about 300K for the
20906 * default compiler. When the builtin is expanded, check at that time whether
20909 * If the front end doesn't have a special hook, record all builtins, even if
20910 * it isn't an instruction set in the current ISA in case the user uses
20911 * function specific options for a different ISA, so that we don't get scope
20912 * errors if a builtin is added in the middle of a function scope. */
20915 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
20917 tree decl = NULL_TREE;
20919 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
20921 ix86_builtins_isa[(int) code].isa = mask;
20923 if ((mask & ix86_isa_flags) != 0
20924 || (lang_hooks.builtin_function
20925 == lang_hooks.builtin_function_ext_scope))
20928 decl = add_builtin_function (name, type, code, BUILT_IN_MD, NULL,
20930 ix86_builtins[(int) code] = decl;
20931 ix86_builtins_isa[(int) code].type = NULL_TREE;
20935 ix86_builtins[(int) code] = NULL_TREE;
20936 ix86_builtins_isa[(int) code].const_p = false;
20937 ix86_builtins_isa[(int) code].type = type;
20938 ix86_builtins_isa[(int) code].name = name;
20945 /* Like def_builtin, but also marks the function decl "const". */
20948 def_builtin_const (int mask, const char *name, tree type,
20949 enum ix86_builtins code)
20951 tree decl = def_builtin (mask, name, type, code);
20953 TREE_READONLY (decl) = 1;
20955 ix86_builtins_isa[(int) code].const_p = true;
20960 /* Add any new builtin functions for a given ISA that may not have been
20961 declared. This saves a bit of space compared to adding all of the
20962 declarations to the tree, even if we didn't use them. */
20965 ix86_add_new_builtins (int isa)
20970 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
20972 if ((ix86_builtins_isa[i].isa & isa) != 0
20973 && ix86_builtins_isa[i].type != NULL_TREE)
20975 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
20976 ix86_builtins_isa[i].type,
20977 i, BUILT_IN_MD, NULL,
20980 ix86_builtins[i] = decl;
20981 ix86_builtins_isa[i].type = NULL_TREE;
20982 if (ix86_builtins_isa[i].const_p)
20983 TREE_READONLY (decl) = 1;
20988 /* Bits for builtin_description.flag. */
20990 /* Set when we don't support the comparison natively, and should
20991 swap_comparison in order to support it. */
20992 #define BUILTIN_DESC_SWAP_OPERANDS 1
20994 struct builtin_description
20996 const unsigned int mask;
20997 const enum insn_code icode;
20998 const char *const name;
20999 const enum ix86_builtins code;
21000 const enum rtx_code comparison;
21004 static const struct builtin_description bdesc_comi[] =
21006 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
21007 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
21008 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
21009 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
21010 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
21011 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
21012 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
21013 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
21014 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
21015 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
21016 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
21017 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
21018 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
21019 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
21020 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
21021 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
21022 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
21023 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
21024 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
21025 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
21026 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
21027 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
21028 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
21029 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
21032 static const struct builtin_description bdesc_pcmpestr[] =
21035 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
21036 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
21037 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
21038 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
21039 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
21040 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
21041 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
21044 static const struct builtin_description bdesc_pcmpistr[] =
21047 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
21048 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
21049 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
21050 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
21051 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
21052 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
21053 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
21056 /* Special builtin types */
21057 enum ix86_special_builtin_type
21059 SPECIAL_FTYPE_UNKNOWN,
21061 V32QI_FTYPE_PCCHAR,
21062 V16QI_FTYPE_PCCHAR,
21064 V8SF_FTYPE_PCFLOAT,
21066 V4DF_FTYPE_PCDOUBLE,
21067 V4SF_FTYPE_PCFLOAT,
21068 V2DF_FTYPE_PCDOUBLE,
21069 V8SF_FTYPE_PCV8SF_V8SF,
21070 V4DF_FTYPE_PCV4DF_V4DF,
21071 V4SF_FTYPE_V4SF_PCV2SF,
21072 V4SF_FTYPE_PCV4SF_V4SF,
21073 V2DF_FTYPE_V2DF_PCDOUBLE,
21074 V2DF_FTYPE_PCV2DF_V2DF,
21076 VOID_FTYPE_PV2SF_V4SF,
21077 VOID_FTYPE_PV4DI_V4DI,
21078 VOID_FTYPE_PV2DI_V2DI,
21079 VOID_FTYPE_PCHAR_V32QI,
21080 VOID_FTYPE_PCHAR_V16QI,
21081 VOID_FTYPE_PFLOAT_V8SF,
21082 VOID_FTYPE_PFLOAT_V4SF,
21083 VOID_FTYPE_PDOUBLE_V4DF,
21084 VOID_FTYPE_PDOUBLE_V2DF,
21086 VOID_FTYPE_PINT_INT,
21087 VOID_FTYPE_PV8SF_V8SF_V8SF,
21088 VOID_FTYPE_PV4DF_V4DF_V4DF,
21089 VOID_FTYPE_PV4SF_V4SF_V4SF,
21090 VOID_FTYPE_PV2DF_V2DF_V2DF
21093 /* Builtin types */
21094 enum ix86_builtin_type
21097 FLOAT128_FTYPE_FLOAT128,
21099 FLOAT128_FTYPE_FLOAT128_FLOAT128,
21100 INT_FTYPE_V8SF_V8SF_PTEST,
21101 INT_FTYPE_V4DI_V4DI_PTEST,
21102 INT_FTYPE_V4DF_V4DF_PTEST,
21103 INT_FTYPE_V4SF_V4SF_PTEST,
21104 INT_FTYPE_V2DI_V2DI_PTEST,
21105 INT_FTYPE_V2DF_V2DF_PTEST,
21137 V4SF_FTYPE_V4SF_VEC_MERGE,
21146 V2DF_FTYPE_V2DF_VEC_MERGE,
21157 V16QI_FTYPE_V16QI_V16QI,
21158 V16QI_FTYPE_V8HI_V8HI,
21159 V8QI_FTYPE_V8QI_V8QI,
21160 V8QI_FTYPE_V4HI_V4HI,
21161 V8HI_FTYPE_V8HI_V8HI,
21162 V8HI_FTYPE_V8HI_V8HI_COUNT,
21163 V8HI_FTYPE_V16QI_V16QI,
21164 V8HI_FTYPE_V4SI_V4SI,
21165 V8HI_FTYPE_V8HI_SI_COUNT,
21166 V8SF_FTYPE_V8SF_V8SF,
21167 V8SF_FTYPE_V8SF_V8SI,
21168 V4SI_FTYPE_V4SI_V4SI,
21169 V4SI_FTYPE_V4SI_V4SI_COUNT,
21170 V4SI_FTYPE_V8HI_V8HI,
21171 V4SI_FTYPE_V4SF_V4SF,
21172 V4SI_FTYPE_V2DF_V2DF,
21173 V4SI_FTYPE_V4SI_SI_COUNT,
21174 V4HI_FTYPE_V4HI_V4HI,
21175 V4HI_FTYPE_V4HI_V4HI_COUNT,
21176 V4HI_FTYPE_V8QI_V8QI,
21177 V4HI_FTYPE_V2SI_V2SI,
21178 V4HI_FTYPE_V4HI_SI_COUNT,
21179 V4DF_FTYPE_V4DF_V4DF,
21180 V4DF_FTYPE_V4DF_V4DI,
21181 V4SF_FTYPE_V4SF_V4SF,
21182 V4SF_FTYPE_V4SF_V4SF_SWAP,
21183 V4SF_FTYPE_V4SF_V4SI,
21184 V4SF_FTYPE_V4SF_V2SI,
21185 V4SF_FTYPE_V4SF_V2DF,
21186 V4SF_FTYPE_V4SF_DI,
21187 V4SF_FTYPE_V4SF_SI,
21188 V2DI_FTYPE_V2DI_V2DI,
21189 V2DI_FTYPE_V2DI_V2DI_COUNT,
21190 V2DI_FTYPE_V16QI_V16QI,
21191 V2DI_FTYPE_V4SI_V4SI,
21192 V2DI_FTYPE_V2DI_V16QI,
21193 V2DI_FTYPE_V2DF_V2DF,
21194 V2DI_FTYPE_V2DI_SI_COUNT,
21195 V2SI_FTYPE_V2SI_V2SI,
21196 V2SI_FTYPE_V2SI_V2SI_COUNT,
21197 V2SI_FTYPE_V4HI_V4HI,
21198 V2SI_FTYPE_V2SF_V2SF,
21199 V2SI_FTYPE_V2SI_SI_COUNT,
21200 V2DF_FTYPE_V2DF_V2DF,
21201 V2DF_FTYPE_V2DF_V2DF_SWAP,
21202 V2DF_FTYPE_V2DF_V4SF,
21203 V2DF_FTYPE_V2DF_V2DI,
21204 V2DF_FTYPE_V2DF_DI,
21205 V2DF_FTYPE_V2DF_SI,
21206 V2SF_FTYPE_V2SF_V2SF,
21207 V1DI_FTYPE_V1DI_V1DI,
21208 V1DI_FTYPE_V1DI_V1DI_COUNT,
21209 V1DI_FTYPE_V8QI_V8QI,
21210 V1DI_FTYPE_V2SI_V2SI,
21211 V1DI_FTYPE_V1DI_SI_COUNT,
21212 UINT64_FTYPE_UINT64_UINT64,
21213 UINT_FTYPE_UINT_UINT,
21214 UINT_FTYPE_UINT_USHORT,
21215 UINT_FTYPE_UINT_UCHAR,
21216 V8HI_FTYPE_V8HI_INT,
21217 V4SI_FTYPE_V4SI_INT,
21218 V4HI_FTYPE_V4HI_INT,
21219 V8SF_FTYPE_V8SF_INT,
21220 V4SI_FTYPE_V8SI_INT,
21221 V4SF_FTYPE_V8SF_INT,
21222 V2DF_FTYPE_V4DF_INT,
21223 V4DF_FTYPE_V4DF_INT,
21224 V4SF_FTYPE_V4SF_INT,
21225 V2DI_FTYPE_V2DI_INT,
21226 V2DI2TI_FTYPE_V2DI_INT,
21227 V2DF_FTYPE_V2DF_INT,
21228 V16QI_FTYPE_V16QI_V16QI_V16QI,
21229 V8SF_FTYPE_V8SF_V8SF_V8SF,
21230 V4DF_FTYPE_V4DF_V4DF_V4DF,
21231 V4SF_FTYPE_V4SF_V4SF_V4SF,
21232 V2DF_FTYPE_V2DF_V2DF_V2DF,
21233 V16QI_FTYPE_V16QI_V16QI_INT,
21234 V8SI_FTYPE_V8SI_V8SI_INT,
21235 V8SI_FTYPE_V8SI_V4SI_INT,
21236 V8HI_FTYPE_V8HI_V8HI_INT,
21237 V8SF_FTYPE_V8SF_V8SF_INT,
21238 V8SF_FTYPE_V8SF_V4SF_INT,
21239 V4SI_FTYPE_V4SI_V4SI_INT,
21240 V4DF_FTYPE_V4DF_V4DF_INT,
21241 V4DF_FTYPE_V4DF_V2DF_INT,
21242 V4SF_FTYPE_V4SF_V4SF_INT,
21243 V2DI_FTYPE_V2DI_V2DI_INT,
21244 V2DI2TI_FTYPE_V2DI_V2DI_INT,
21245 V1DI2DI_FTYPE_V1DI_V1DI_INT,
21246 V2DF_FTYPE_V2DF_V2DF_INT,
21247 V2DI_FTYPE_V2DI_UINT_UINT,
21248 V2DI_FTYPE_V2DI_V2DI_UINT_UINT
21251 /* Special builtins with variable number of arguments. */
21252 static const struct builtin_description bdesc_special_args[] =
21255 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
21258 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
21261 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21262 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21263 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
21265 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
21266 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
21267 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
21268 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
21270 /* SSE or 3DNow!A */
21271 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21272 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI },
21275 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21276 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21277 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21278 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
21279 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21280 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
21281 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
21282 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
21283 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
21285 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
21286 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
21289 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
21292 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
21295 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21296 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21299 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
21300 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, 0, IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
21301 { OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_64BIT, CODE_FOR_avx_vzeroupper_rex64, 0, IX86_BUILTIN_VZEROUPPER_REX64, UNKNOWN, (int) VOID_FTYPE_VOID },
21303 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
21304 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastsd256, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
21305 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss256, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
21306 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_pd256, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
21307 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_ps256, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
21309 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
21310 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
21311 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
21312 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
21313 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
21314 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
21315 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
21317 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
21318 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
21319 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
21321 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
21322 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
21323 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
21324 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
21325 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
21326 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
21327 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
21328 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
21331 /* Builtins with variable number of arguments. */
21332 static const struct builtin_description bdesc_args[] =
21335 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21336 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21337 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21338 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21339 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21340 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21342 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21343 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21344 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21345 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21346 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21347 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21348 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21349 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21351 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21352 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21354 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21355 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21356 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21357 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21359 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21360 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21361 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21362 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21363 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21364 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21366 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21367 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21368 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21369 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21370 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
21371 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
21373 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21374 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
21375 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21377 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
21379 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21380 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21381 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21382 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21383 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21384 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21386 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21387 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21388 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21389 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21390 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21391 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21393 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21394 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21395 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21396 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21399 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21400 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21401 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21402 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21404 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21405 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21406 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21407 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21408 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21409 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21410 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21411 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21412 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21413 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21414 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21415 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21416 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21417 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21418 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21421 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21422 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21423 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21424 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21425 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21426 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21429 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
21430 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21431 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21432 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21433 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21434 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21435 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21436 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21437 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21438 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21439 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21440 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21442 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21444 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21445 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21446 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21447 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21448 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21449 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21450 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21451 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21453 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21454 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21455 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21456 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21457 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21458 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21459 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21460 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21461 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21462 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21463 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
21464 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21465 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21466 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21467 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21468 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21469 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21470 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21471 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21472 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21473 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21474 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21476 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21477 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21478 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21479 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21481 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21482 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21483 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21484 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21486 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21487 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21488 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21489 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21490 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21492 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
21493 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
21494 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
21496 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
21498 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21499 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21500 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21502 /* SSE MMX or 3Dnow!A */
21503 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21504 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21505 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21507 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21508 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21509 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21510 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21512 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
21513 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
21515 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
21518 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21520 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
21521 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
21522 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
21523 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
21524 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
21526 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21527 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21528 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
21529 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21530 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21532 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
21534 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21535 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21536 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21537 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21539 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21540 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
21541 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21543 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21544 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21545 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21546 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21547 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21548 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21549 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21550 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21552 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21553 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21554 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21555 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21556 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
21557 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21558 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21559 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21560 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21561 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21562 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21563 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21564 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21565 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21566 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21567 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21568 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21569 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21570 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21571 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21573 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21574 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21575 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21576 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21578 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21579 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21580 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21581 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21583 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21584 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21585 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21587 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
21589 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21590 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21591 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21592 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21593 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21594 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21595 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21596 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21598 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21599 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21600 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21601 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21602 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21603 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21604 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21605 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21607 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21608 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
21610 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21611 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21612 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21613 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21615 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21616 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21618 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21619 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21620 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21621 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21622 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21623 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21625 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21626 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21627 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21628 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21630 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21631 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21632 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21633 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21634 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21635 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21636 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21637 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21639 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21640 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
21641 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21643 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21644 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
21646 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
21647 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
21649 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
21651 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
21652 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
21653 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
21654 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
21656 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
21657 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21658 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21659 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21660 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21661 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21662 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21664 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
21665 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21666 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21667 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21668 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21669 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21670 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21672 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21673 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21674 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21675 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21677 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
21678 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21679 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21681 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
21683 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
21684 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
21686 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
21689 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21690 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21693 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
21694 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21696 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21697 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21698 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21699 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21700 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21701 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21704 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
21705 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
21706 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
21707 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
21708 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
21709 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21711 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21712 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21713 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21714 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21715 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21716 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21717 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21718 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21719 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21720 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21721 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21722 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21723 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
21724 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
21725 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21726 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21727 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21728 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21729 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21730 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21731 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21732 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21733 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21734 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21737 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT },
21738 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT },
21741 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21742 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21743 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
21744 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
21745 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21746 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21747 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21748 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
21749 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
21750 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
21752 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
21753 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
21754 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
21755 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
21756 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
21757 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
21758 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
21759 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
21760 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
21761 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
21762 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
21763 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
21764 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
21766 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
21767 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21768 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21769 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21770 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21771 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21772 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21773 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21774 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21775 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21776 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
21777 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21779 /* SSE4.1 and SSE5 */
21780 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
21781 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
21782 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21783 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21785 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21786 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21787 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21790 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21791 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
21792 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
21793 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
21794 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
21797 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
21798 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
21799 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
21800 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21803 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
21804 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
21806 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21807 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21808 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21809 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21812 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
21815 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21816 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21817 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21818 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21819 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21820 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21821 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21822 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21823 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21824 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21825 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21826 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21827 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21828 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21829 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21830 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21831 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21832 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21833 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21834 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21835 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21836 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21837 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21838 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21839 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21840 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21842 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
21843 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
21844 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
21845 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
21847 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21848 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21849 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
21850 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
21851 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21852 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21853 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21854 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21855 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21856 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21857 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21858 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21859 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21860 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
21861 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
21862 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
21863 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
21864 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
21865 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
21866 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
21867 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
21868 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
21869 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
21870 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
21871 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21872 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21873 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
21874 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
21875 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
21876 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
21877 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
21878 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
21879 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
21880 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
21882 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21883 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21884 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
21886 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
21887 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21888 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21889 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21890 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21892 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21894 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
21895 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
21897 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21898 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21899 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21900 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21902 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
21903 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
21904 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
21905 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si_si256, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
21906 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps_ps256, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
21907 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd_pd256, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
21909 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21910 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21911 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21912 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21913 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21914 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21915 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21916 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21917 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21918 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21919 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21920 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21921 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21922 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21923 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21925 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
21926 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
21930 enum multi_arg_type {
21940 MULTI_ARG_3_PERMPS,
21941 MULTI_ARG_3_PERMPD,
21948 MULTI_ARG_2_DI_IMM,
21949 MULTI_ARG_2_SI_IMM,
21950 MULTI_ARG_2_HI_IMM,
21951 MULTI_ARG_2_QI_IMM,
21952 MULTI_ARG_2_SF_CMP,
21953 MULTI_ARG_2_DF_CMP,
21954 MULTI_ARG_2_DI_CMP,
21955 MULTI_ARG_2_SI_CMP,
21956 MULTI_ARG_2_HI_CMP,
21957 MULTI_ARG_2_QI_CMP,
21980 static const struct builtin_description bdesc_multi_arg[] =
21982 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, 0, (int)MULTI_ARG_3_SF },
21983 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, 0, (int)MULTI_ARG_3_DF },
21984 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, 0, (int)MULTI_ARG_3_SF },
21985 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, 0, (int)MULTI_ARG_3_DF },
21986 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, 0, (int)MULTI_ARG_3_SF },
21987 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, 0, (int)MULTI_ARG_3_DF },
21988 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, 0, (int)MULTI_ARG_3_SF },
21989 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, 0, (int)MULTI_ARG_3_DF },
21990 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, 0, (int)MULTI_ARG_3_SF },
21991 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, 0, (int)MULTI_ARG_3_DF },
21992 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, 0, (int)MULTI_ARG_3_SF },
21993 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, 0, (int)MULTI_ARG_3_DF },
21994 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, 0, (int)MULTI_ARG_3_SF },
21995 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, 0, (int)MULTI_ARG_3_DF },
21996 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, 0, (int)MULTI_ARG_3_SF },
21997 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, 0, (int)MULTI_ARG_3_DF },
21998 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV, 0, (int)MULTI_ARG_3_DI },
21999 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
22000 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0, (int)MULTI_ARG_3_SI },
22001 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0, (int)MULTI_ARG_3_HI },
22002 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0, (int)MULTI_ARG_3_QI },
22003 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0, (int)MULTI_ARG_3_DF },
22004 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0, (int)MULTI_ARG_3_SF },
22005 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, 0, (int)MULTI_ARG_3_QI },
22006 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, 0, (int)MULTI_ARG_3_PERMPS },
22007 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, 0, (int)MULTI_ARG_3_PERMPD },
22008 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, 0, (int)MULTI_ARG_3_HI },
22009 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, 0, (int)MULTI_ARG_3_HI },
22010 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, 0, (int)MULTI_ARG_3_HI_SI },
22011 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, 0, (int)MULTI_ARG_3_HI_SI },
22012 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, 0, (int)MULTI_ARG_3_SI },
22013 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, 0, (int)MULTI_ARG_3_SI },
22014 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, 0, (int)MULTI_ARG_3_SI_DI },
22015 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, 0, (int)MULTI_ARG_3_SI_DI },
22016 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, 0, (int)MULTI_ARG_3_SI_DI },
22017 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, 0, (int)MULTI_ARG_3_SI_DI },
22018 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, 0, (int)MULTI_ARG_3_HI_SI },
22019 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, 0, (int)MULTI_ARG_3_HI_SI },
22020 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, 0, (int)MULTI_ARG_2_DI },
22021 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, 0, (int)MULTI_ARG_2_SI },
22022 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, 0, (int)MULTI_ARG_2_HI },
22023 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, 0, (int)MULTI_ARG_2_QI },
22024 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, 0, (int)MULTI_ARG_2_DI_IMM },
22025 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, 0, (int)MULTI_ARG_2_SI_IMM },
22026 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, 0, (int)MULTI_ARG_2_HI_IMM },
22027 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, 0, (int)MULTI_ARG_2_QI_IMM },
22028 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, 0, (int)MULTI_ARG_2_DI },
22029 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, 0, (int)MULTI_ARG_2_SI },
22030 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, 0, (int)MULTI_ARG_2_HI },
22031 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, 0, (int)MULTI_ARG_2_QI },
22032 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, 0, (int)MULTI_ARG_2_DI },
22033 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, 0, (int)MULTI_ARG_2_SI },
22034 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, 0, (int)MULTI_ARG_2_HI },
22035 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, 0, (int)MULTI_ARG_2_QI },
22036 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, 0, (int)MULTI_ARG_2_SF },
22037 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, 0, (int)MULTI_ARG_2_DF },
22038 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, 0, (int)MULTI_ARG_1_SF },
22039 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, 0, (int)MULTI_ARG_1_DF },
22040 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, 0, (int)MULTI_ARG_1_PH2PS },
22041 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, 0, (int)MULTI_ARG_1_PS2PH },
22042 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, 0, (int)MULTI_ARG_1_QI_HI },
22043 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, 0, (int)MULTI_ARG_1_QI_SI },
22044 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, 0, (int)MULTI_ARG_1_QI_DI },
22045 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, 0, (int)MULTI_ARG_1_HI_SI },
22046 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, 0, (int)MULTI_ARG_1_HI_DI },
22047 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, 0, (int)MULTI_ARG_1_SI_DI },
22048 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, 0, (int)MULTI_ARG_1_QI_HI },
22049 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, 0, (int)MULTI_ARG_1_QI_SI },
22050 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, 0, (int)MULTI_ARG_1_QI_DI },
22051 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, 0, (int)MULTI_ARG_1_HI_SI },
22052 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, 0, (int)MULTI_ARG_1_HI_DI },
22053 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, 0, (int)MULTI_ARG_1_SI_DI },
22054 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, 0, (int)MULTI_ARG_1_QI_HI },
22055 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, 0, (int)MULTI_ARG_1_HI_SI },
22056 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, 0, (int)MULTI_ARG_1_SI_DI },
22058 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
22059 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
22060 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
22061 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
22062 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
22063 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
22064 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
22065 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
22066 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22067 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22068 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
22069 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
22070 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
22071 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
22072 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
22073 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
22075 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
22076 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
22077 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
22078 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
22079 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
22080 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
22081 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
22082 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
22083 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22084 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22085 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
22086 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
22087 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
22088 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
22089 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
22090 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
22092 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
22093 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
22094 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
22095 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
22096 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
22097 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
22098 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
22099 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
22100 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22101 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22102 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
22103 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
22104 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
22105 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
22106 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
22107 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
22109 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
22110 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
22111 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
22112 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
22113 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
22114 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
22115 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
22116 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
22117 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22118 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22119 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
22120 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
22121 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
22122 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
22123 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
22124 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
22126 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
22127 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
22128 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
22129 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
22130 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
22131 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
22132 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
22134 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
22135 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
22136 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
22137 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
22138 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
22139 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
22140 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
22142 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
22143 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
22144 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
22145 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
22146 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
22147 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
22148 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
22150 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
22151 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
22152 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
22153 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
22154 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
22155 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
22156 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
22158 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
22159 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
22160 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
22161 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
22162 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
22163 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
22164 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
22166 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
22167 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
22168 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
22169 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
22170 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
22171 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
22172 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
22174 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
22175 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
22176 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
22177 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
22178 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
22179 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
22180 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
22182 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
22183 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
22184 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
22185 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
22186 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
22187 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
22188 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
22190 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
22191 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
22192 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
22193 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
22194 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
22195 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
22196 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
22197 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
22199 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
22200 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
22201 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
22202 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
22203 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
22204 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
22205 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
22206 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
22208 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
22209 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
22210 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
22211 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
22212 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
22213 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
22214 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
22215 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
22218 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
22219 in the current target ISA to allow the user to compile particular modules
22220 with different target specific options that differ from the command line
22223 ix86_init_mmx_sse_builtins (void)
22225 const struct builtin_description * d;
22228 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
22229 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
22230 tree V1DI_type_node
22231 = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
22232 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
22233 tree V2DI_type_node
22234 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
22235 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
22236 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
22237 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
22238 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
22239 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
22240 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
22242 tree pchar_type_node = build_pointer_type (char_type_node);
22243 tree pcchar_type_node
22244 = build_pointer_type (build_type_variant (char_type_node, 1, 0));
22245 tree pfloat_type_node = build_pointer_type (float_type_node);
22246 tree pcfloat_type_node
22247 = build_pointer_type (build_type_variant (float_type_node, 1, 0));
22248 tree pv2sf_type_node = build_pointer_type (V2SF_type_node);
22249 tree pcv2sf_type_node
22250 = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0));
22251 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
22252 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
22255 tree int_ftype_v4sf_v4sf
22256 = build_function_type_list (integer_type_node,
22257 V4SF_type_node, V4SF_type_node, NULL_TREE);
22258 tree v4si_ftype_v4sf_v4sf
22259 = build_function_type_list (V4SI_type_node,
22260 V4SF_type_node, V4SF_type_node, NULL_TREE);
22261 /* MMX/SSE/integer conversions. */
22262 tree int_ftype_v4sf
22263 = build_function_type_list (integer_type_node,
22264 V4SF_type_node, NULL_TREE);
22265 tree int64_ftype_v4sf
22266 = build_function_type_list (long_long_integer_type_node,
22267 V4SF_type_node, NULL_TREE);
22268 tree int_ftype_v8qi
22269 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
22270 tree v4sf_ftype_v4sf_int
22271 = build_function_type_list (V4SF_type_node,
22272 V4SF_type_node, integer_type_node, NULL_TREE);
22273 tree v4sf_ftype_v4sf_int64
22274 = build_function_type_list (V4SF_type_node,
22275 V4SF_type_node, long_long_integer_type_node,
22277 tree v4sf_ftype_v4sf_v2si
22278 = build_function_type_list (V4SF_type_node,
22279 V4SF_type_node, V2SI_type_node, NULL_TREE);
22281 /* Miscellaneous. */
22282 tree v8qi_ftype_v4hi_v4hi
22283 = build_function_type_list (V8QI_type_node,
22284 V4HI_type_node, V4HI_type_node, NULL_TREE);
22285 tree v4hi_ftype_v2si_v2si
22286 = build_function_type_list (V4HI_type_node,
22287 V2SI_type_node, V2SI_type_node, NULL_TREE);
22288 tree v4sf_ftype_v4sf_v4sf_int
22289 = build_function_type_list (V4SF_type_node,
22290 V4SF_type_node, V4SF_type_node,
22291 integer_type_node, NULL_TREE);
22292 tree v2si_ftype_v4hi_v4hi
22293 = build_function_type_list (V2SI_type_node,
22294 V4HI_type_node, V4HI_type_node, NULL_TREE);
22295 tree v4hi_ftype_v4hi_int
22296 = build_function_type_list (V4HI_type_node,
22297 V4HI_type_node, integer_type_node, NULL_TREE);
22298 tree v2si_ftype_v2si_int
22299 = build_function_type_list (V2SI_type_node,
22300 V2SI_type_node, integer_type_node, NULL_TREE);
22301 tree v1di_ftype_v1di_int
22302 = build_function_type_list (V1DI_type_node,
22303 V1DI_type_node, integer_type_node, NULL_TREE);
22305 tree void_ftype_void
22306 = build_function_type (void_type_node, void_list_node);
22307 tree void_ftype_unsigned
22308 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
22309 tree void_ftype_unsigned_unsigned
22310 = build_function_type_list (void_type_node, unsigned_type_node,
22311 unsigned_type_node, NULL_TREE);
22312 tree void_ftype_pcvoid_unsigned_unsigned
22313 = build_function_type_list (void_type_node, const_ptr_type_node,
22314 unsigned_type_node, unsigned_type_node,
22316 tree unsigned_ftype_void
22317 = build_function_type (unsigned_type_node, void_list_node);
22318 tree v2si_ftype_v4sf
22319 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
22320 /* Loads/stores. */
22321 tree void_ftype_v8qi_v8qi_pchar
22322 = build_function_type_list (void_type_node,
22323 V8QI_type_node, V8QI_type_node,
22324 pchar_type_node, NULL_TREE);
22325 tree v4sf_ftype_pcfloat
22326 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
22327 tree v4sf_ftype_v4sf_pcv2sf
22328 = build_function_type_list (V4SF_type_node,
22329 V4SF_type_node, pcv2sf_type_node, NULL_TREE);
22330 tree void_ftype_pv2sf_v4sf
22331 = build_function_type_list (void_type_node,
22332 pv2sf_type_node, V4SF_type_node, NULL_TREE);
22333 tree void_ftype_pfloat_v4sf
22334 = build_function_type_list (void_type_node,
22335 pfloat_type_node, V4SF_type_node, NULL_TREE);
22336 tree void_ftype_pdi_di
22337 = build_function_type_list (void_type_node,
22338 pdi_type_node, long_long_unsigned_type_node,
22340 tree void_ftype_pv2di_v2di
22341 = build_function_type_list (void_type_node,
22342 pv2di_type_node, V2DI_type_node, NULL_TREE);
22343 /* Normal vector unops. */
22344 tree v4sf_ftype_v4sf
22345 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
22346 tree v16qi_ftype_v16qi
22347 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
22348 tree v8hi_ftype_v8hi
22349 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
22350 tree v4si_ftype_v4si
22351 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
22352 tree v8qi_ftype_v8qi
22353 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
22354 tree v4hi_ftype_v4hi
22355 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
22357 /* Normal vector binops. */
22358 tree v4sf_ftype_v4sf_v4sf
22359 = build_function_type_list (V4SF_type_node,
22360 V4SF_type_node, V4SF_type_node, NULL_TREE);
22361 tree v8qi_ftype_v8qi_v8qi
22362 = build_function_type_list (V8QI_type_node,
22363 V8QI_type_node, V8QI_type_node, NULL_TREE);
22364 tree v4hi_ftype_v4hi_v4hi
22365 = build_function_type_list (V4HI_type_node,
22366 V4HI_type_node, V4HI_type_node, NULL_TREE);
22367 tree v2si_ftype_v2si_v2si
22368 = build_function_type_list (V2SI_type_node,
22369 V2SI_type_node, V2SI_type_node, NULL_TREE);
22370 tree v1di_ftype_v1di_v1di
22371 = build_function_type_list (V1DI_type_node,
22372 V1DI_type_node, V1DI_type_node, NULL_TREE);
22373 tree v1di_ftype_v1di_v1di_int
22374 = build_function_type_list (V1DI_type_node,
22375 V1DI_type_node, V1DI_type_node,
22376 integer_type_node, NULL_TREE);
22377 tree v2si_ftype_v2sf
22378 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
22379 tree v2sf_ftype_v2si
22380 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
22381 tree v2si_ftype_v2si
22382 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
22383 tree v2sf_ftype_v2sf
22384 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
22385 tree v2sf_ftype_v2sf_v2sf
22386 = build_function_type_list (V2SF_type_node,
22387 V2SF_type_node, V2SF_type_node, NULL_TREE);
22388 tree v2si_ftype_v2sf_v2sf
22389 = build_function_type_list (V2SI_type_node,
22390 V2SF_type_node, V2SF_type_node, NULL_TREE);
22391 tree pint_type_node = build_pointer_type (integer_type_node);
22392 tree pdouble_type_node = build_pointer_type (double_type_node);
22393 tree pcdouble_type_node = build_pointer_type (
22394 build_type_variant (double_type_node, 1, 0));
22395 tree int_ftype_v2df_v2df
22396 = build_function_type_list (integer_type_node,
22397 V2DF_type_node, V2DF_type_node, NULL_TREE);
22399 tree void_ftype_pcvoid
22400 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
22401 tree v4sf_ftype_v4si
22402 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
22403 tree v4si_ftype_v4sf
22404 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
22405 tree v2df_ftype_v4si
22406 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
22407 tree v4si_ftype_v2df
22408 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
22409 tree v4si_ftype_v2df_v2df
22410 = build_function_type_list (V4SI_type_node,
22411 V2DF_type_node, V2DF_type_node, NULL_TREE);
22412 tree v2si_ftype_v2df
22413 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
22414 tree v4sf_ftype_v2df
22415 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
22416 tree v2df_ftype_v2si
22417 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
22418 tree v2df_ftype_v4sf
22419 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
22420 tree int_ftype_v2df
22421 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
22422 tree int64_ftype_v2df
22423 = build_function_type_list (long_long_integer_type_node,
22424 V2DF_type_node, NULL_TREE);
22425 tree v2df_ftype_v2df_int
22426 = build_function_type_list (V2DF_type_node,
22427 V2DF_type_node, integer_type_node, NULL_TREE);
22428 tree v2df_ftype_v2df_int64
22429 = build_function_type_list (V2DF_type_node,
22430 V2DF_type_node, long_long_integer_type_node,
22432 tree v4sf_ftype_v4sf_v2df
22433 = build_function_type_list (V4SF_type_node,
22434 V4SF_type_node, V2DF_type_node, NULL_TREE);
22435 tree v2df_ftype_v2df_v4sf
22436 = build_function_type_list (V2DF_type_node,
22437 V2DF_type_node, V4SF_type_node, NULL_TREE);
22438 tree v2df_ftype_v2df_v2df_int
22439 = build_function_type_list (V2DF_type_node,
22440 V2DF_type_node, V2DF_type_node,
22443 tree v2df_ftype_v2df_pcdouble
22444 = build_function_type_list (V2DF_type_node,
22445 V2DF_type_node, pcdouble_type_node, NULL_TREE);
22446 tree void_ftype_pdouble_v2df
22447 = build_function_type_list (void_type_node,
22448 pdouble_type_node, V2DF_type_node, NULL_TREE);
22449 tree void_ftype_pint_int
22450 = build_function_type_list (void_type_node,
22451 pint_type_node, integer_type_node, NULL_TREE);
22452 tree void_ftype_v16qi_v16qi_pchar
22453 = build_function_type_list (void_type_node,
22454 V16QI_type_node, V16QI_type_node,
22455 pchar_type_node, NULL_TREE);
22456 tree v2df_ftype_pcdouble
22457 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
22458 tree v2df_ftype_v2df_v2df
22459 = build_function_type_list (V2DF_type_node,
22460 V2DF_type_node, V2DF_type_node, NULL_TREE);
22461 tree v16qi_ftype_v16qi_v16qi
22462 = build_function_type_list (V16QI_type_node,
22463 V16QI_type_node, V16QI_type_node, NULL_TREE);
22464 tree v8hi_ftype_v8hi_v8hi
22465 = build_function_type_list (V8HI_type_node,
22466 V8HI_type_node, V8HI_type_node, NULL_TREE);
22467 tree v4si_ftype_v4si_v4si
22468 = build_function_type_list (V4SI_type_node,
22469 V4SI_type_node, V4SI_type_node, NULL_TREE);
22470 tree v2di_ftype_v2di_v2di
22471 = build_function_type_list (V2DI_type_node,
22472 V2DI_type_node, V2DI_type_node, NULL_TREE);
22473 tree v2di_ftype_v2df_v2df
22474 = build_function_type_list (V2DI_type_node,
22475 V2DF_type_node, V2DF_type_node, NULL_TREE);
22476 tree v2df_ftype_v2df
22477 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
22478 tree v2di_ftype_v2di_int
22479 = build_function_type_list (V2DI_type_node,
22480 V2DI_type_node, integer_type_node, NULL_TREE);
22481 tree v2di_ftype_v2di_v2di_int
22482 = build_function_type_list (V2DI_type_node, V2DI_type_node,
22483 V2DI_type_node, integer_type_node, NULL_TREE);
22484 tree v4si_ftype_v4si_int
22485 = build_function_type_list (V4SI_type_node,
22486 V4SI_type_node, integer_type_node, NULL_TREE);
22487 tree v8hi_ftype_v8hi_int
22488 = build_function_type_list (V8HI_type_node,
22489 V8HI_type_node, integer_type_node, NULL_TREE);
22490 tree v4si_ftype_v8hi_v8hi
22491 = build_function_type_list (V4SI_type_node,
22492 V8HI_type_node, V8HI_type_node, NULL_TREE);
22493 tree v1di_ftype_v8qi_v8qi
22494 = build_function_type_list (V1DI_type_node,
22495 V8QI_type_node, V8QI_type_node, NULL_TREE);
22496 tree v1di_ftype_v2si_v2si
22497 = build_function_type_list (V1DI_type_node,
22498 V2SI_type_node, V2SI_type_node, NULL_TREE);
22499 tree v2di_ftype_v16qi_v16qi
22500 = build_function_type_list (V2DI_type_node,
22501 V16QI_type_node, V16QI_type_node, NULL_TREE);
22502 tree v2di_ftype_v4si_v4si
22503 = build_function_type_list (V2DI_type_node,
22504 V4SI_type_node, V4SI_type_node, NULL_TREE);
22505 tree int_ftype_v16qi
22506 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
22507 tree v16qi_ftype_pcchar
22508 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
22509 tree void_ftype_pchar_v16qi
22510 = build_function_type_list (void_type_node,
22511 pchar_type_node, V16QI_type_node, NULL_TREE);
22513 tree v2di_ftype_v2di_unsigned_unsigned
22514 = build_function_type_list (V2DI_type_node, V2DI_type_node,
22515 unsigned_type_node, unsigned_type_node,
22517 tree v2di_ftype_v2di_v2di_unsigned_unsigned
22518 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
22519 unsigned_type_node, unsigned_type_node,
22521 tree v2di_ftype_v2di_v16qi
22522 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
22524 tree v2df_ftype_v2df_v2df_v2df
22525 = build_function_type_list (V2DF_type_node,
22526 V2DF_type_node, V2DF_type_node,
22527 V2DF_type_node, NULL_TREE);
22528 tree v4sf_ftype_v4sf_v4sf_v4sf
22529 = build_function_type_list (V4SF_type_node,
22530 V4SF_type_node, V4SF_type_node,
22531 V4SF_type_node, NULL_TREE);
22532 tree v8hi_ftype_v16qi
22533 = build_function_type_list (V8HI_type_node, V16QI_type_node,
22535 tree v4si_ftype_v16qi
22536 = build_function_type_list (V4SI_type_node, V16QI_type_node,
22538 tree v2di_ftype_v16qi
22539 = build_function_type_list (V2DI_type_node, V16QI_type_node,
22541 tree v4si_ftype_v8hi
22542 = build_function_type_list (V4SI_type_node, V8HI_type_node,
22544 tree v2di_ftype_v8hi
22545 = build_function_type_list (V2DI_type_node, V8HI_type_node,
22547 tree v2di_ftype_v4si
22548 = build_function_type_list (V2DI_type_node, V4SI_type_node,
22550 tree v2di_ftype_pv2di
22551 = build_function_type_list (V2DI_type_node, pv2di_type_node,
22553 tree v16qi_ftype_v16qi_v16qi_int
22554 = build_function_type_list (V16QI_type_node, V16QI_type_node,
22555 V16QI_type_node, integer_type_node,
22557 tree v16qi_ftype_v16qi_v16qi_v16qi
22558 = build_function_type_list (V16QI_type_node, V16QI_type_node,
22559 V16QI_type_node, V16QI_type_node,
22561 tree v8hi_ftype_v8hi_v8hi_int
22562 = build_function_type_list (V8HI_type_node, V8HI_type_node,
22563 V8HI_type_node, integer_type_node,
22565 tree v4si_ftype_v4si_v4si_int
22566 = build_function_type_list (V4SI_type_node, V4SI_type_node,
22567 V4SI_type_node, integer_type_node,
22569 tree int_ftype_v2di_v2di
22570 = build_function_type_list (integer_type_node,
22571 V2DI_type_node, V2DI_type_node,
22573 tree int_ftype_v16qi_int_v16qi_int_int
22574 = build_function_type_list (integer_type_node,
22581 tree v16qi_ftype_v16qi_int_v16qi_int_int
22582 = build_function_type_list (V16QI_type_node,
22589 tree int_ftype_v16qi_v16qi_int
22590 = build_function_type_list (integer_type_node,
22596 /* SSE5 instructions */
22597 tree v2di_ftype_v2di_v2di_v2di
22598 = build_function_type_list (V2DI_type_node,
22604 tree v4si_ftype_v4si_v4si_v4si
22605 = build_function_type_list (V4SI_type_node,
22611 tree v4si_ftype_v4si_v4si_v2di
22612 = build_function_type_list (V4SI_type_node,
22618 tree v8hi_ftype_v8hi_v8hi_v8hi
22619 = build_function_type_list (V8HI_type_node,
22625 tree v8hi_ftype_v8hi_v8hi_v4si
22626 = build_function_type_list (V8HI_type_node,
22632 tree v2df_ftype_v2df_v2df_v16qi
22633 = build_function_type_list (V2DF_type_node,
22639 tree v4sf_ftype_v4sf_v4sf_v16qi
22640 = build_function_type_list (V4SF_type_node,
22646 tree v2di_ftype_v2di_si
22647 = build_function_type_list (V2DI_type_node,
22652 tree v4si_ftype_v4si_si
22653 = build_function_type_list (V4SI_type_node,
22658 tree v8hi_ftype_v8hi_si
22659 = build_function_type_list (V8HI_type_node,
22664 tree v16qi_ftype_v16qi_si
22665 = build_function_type_list (V16QI_type_node,
22669 tree v4sf_ftype_v4hi
22670 = build_function_type_list (V4SF_type_node,
22674 tree v4hi_ftype_v4sf
22675 = build_function_type_list (V4HI_type_node,
22679 tree v2di_ftype_v2di
22680 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
22682 tree v16qi_ftype_v8hi_v8hi
22683 = build_function_type_list (V16QI_type_node,
22684 V8HI_type_node, V8HI_type_node,
22686 tree v8hi_ftype_v4si_v4si
22687 = build_function_type_list (V8HI_type_node,
22688 V4SI_type_node, V4SI_type_node,
22690 tree v8hi_ftype_v16qi_v16qi
22691 = build_function_type_list (V8HI_type_node,
22692 V16QI_type_node, V16QI_type_node,
22694 tree v4hi_ftype_v8qi_v8qi
22695 = build_function_type_list (V4HI_type_node,
22696 V8QI_type_node, V8QI_type_node,
22698 tree unsigned_ftype_unsigned_uchar
22699 = build_function_type_list (unsigned_type_node,
22700 unsigned_type_node,
22701 unsigned_char_type_node,
22703 tree unsigned_ftype_unsigned_ushort
22704 = build_function_type_list (unsigned_type_node,
22705 unsigned_type_node,
22706 short_unsigned_type_node,
22708 tree unsigned_ftype_unsigned_unsigned
22709 = build_function_type_list (unsigned_type_node,
22710 unsigned_type_node,
22711 unsigned_type_node,
22713 tree uint64_ftype_uint64_uint64
22714 = build_function_type_list (long_long_unsigned_type_node,
22715 long_long_unsigned_type_node,
22716 long_long_unsigned_type_node,
22718 tree float_ftype_float
22719 = build_function_type_list (float_type_node,
22724 tree V32QI_type_node = build_vector_type_for_mode (char_type_node,
22726 tree V8SI_type_node = build_vector_type_for_mode (intSI_type_node,
22728 tree V8SF_type_node = build_vector_type_for_mode (float_type_node,
22730 tree V4DI_type_node = build_vector_type_for_mode (long_long_integer_type_node,
22732 tree V4DF_type_node = build_vector_type_for_mode (double_type_node,
22734 tree v8sf_ftype_v8sf
22735 = build_function_type_list (V8SF_type_node,
22738 tree v8si_ftype_v8sf
22739 = build_function_type_list (V8SI_type_node,
22742 tree v8sf_ftype_v8si
22743 = build_function_type_list (V8SF_type_node,
22746 tree v4si_ftype_v4df
22747 = build_function_type_list (V4SI_type_node,
22750 tree v4df_ftype_v4df
22751 = build_function_type_list (V4DF_type_node,
22754 tree v4df_ftype_v4si
22755 = build_function_type_list (V4DF_type_node,
22758 tree v4df_ftype_v4sf
22759 = build_function_type_list (V4DF_type_node,
22762 tree v4sf_ftype_v4df
22763 = build_function_type_list (V4SF_type_node,
22766 tree v8sf_ftype_v8sf_v8sf
22767 = build_function_type_list (V8SF_type_node,
22768 V8SF_type_node, V8SF_type_node,
22770 tree v4df_ftype_v4df_v4df
22771 = build_function_type_list (V4DF_type_node,
22772 V4DF_type_node, V4DF_type_node,
22774 tree v8sf_ftype_v8sf_int
22775 = build_function_type_list (V8SF_type_node,
22776 V8SF_type_node, integer_type_node,
22778 tree v4si_ftype_v8si_int
22779 = build_function_type_list (V4SI_type_node,
22780 V8SI_type_node, integer_type_node,
22782 tree v4df_ftype_v4df_int
22783 = build_function_type_list (V4DF_type_node,
22784 V4DF_type_node, integer_type_node,
22786 tree v4sf_ftype_v8sf_int
22787 = build_function_type_list (V4SF_type_node,
22788 V8SF_type_node, integer_type_node,
22790 tree v2df_ftype_v4df_int
22791 = build_function_type_list (V2DF_type_node,
22792 V4DF_type_node, integer_type_node,
22794 tree v8sf_ftype_v8sf_v8sf_int
22795 = build_function_type_list (V8SF_type_node,
22796 V8SF_type_node, V8SF_type_node,
22799 tree v8sf_ftype_v8sf_v8sf_v8sf
22800 = build_function_type_list (V8SF_type_node,
22801 V8SF_type_node, V8SF_type_node,
22804 tree v4df_ftype_v4df_v4df_v4df
22805 = build_function_type_list (V4DF_type_node,
22806 V4DF_type_node, V4DF_type_node,
22809 tree v8si_ftype_v8si_v8si_int
22810 = build_function_type_list (V8SI_type_node,
22811 V8SI_type_node, V8SI_type_node,
22814 tree v4df_ftype_v4df_v4df_int
22815 = build_function_type_list (V4DF_type_node,
22816 V4DF_type_node, V4DF_type_node,
22819 tree v8sf_ftype_pcfloat
22820 = build_function_type_list (V8SF_type_node,
22823 tree v4df_ftype_pcdouble
22824 = build_function_type_list (V4DF_type_node,
22825 pcdouble_type_node,
22827 tree pcv4sf_type_node
22828 = build_pointer_type (build_type_variant (V4SF_type_node, 1, 0));
22829 tree pcv2df_type_node
22830 = build_pointer_type (build_type_variant (V2DF_type_node, 1, 0));
22831 tree v8sf_ftype_pcv4sf
22832 = build_function_type_list (V8SF_type_node,
22835 tree v4df_ftype_pcv2df
22836 = build_function_type_list (V4DF_type_node,
22839 tree v32qi_ftype_pcchar
22840 = build_function_type_list (V32QI_type_node,
22843 tree void_ftype_pchar_v32qi
22844 = build_function_type_list (void_type_node,
22845 pchar_type_node, V32QI_type_node,
22847 tree v8si_ftype_v8si_v4si_int
22848 = build_function_type_list (V8SI_type_node,
22849 V8SI_type_node, V4SI_type_node,
22852 tree pv4di_type_node = build_pointer_type (V4DI_type_node);
22853 tree void_ftype_pv4di_v4di
22854 = build_function_type_list (void_type_node,
22855 pv4di_type_node, V4DI_type_node,
22857 tree v8sf_ftype_v8sf_v4sf_int
22858 = build_function_type_list (V8SF_type_node,
22859 V8SF_type_node, V4SF_type_node,
22862 tree v4df_ftype_v4df_v2df_int
22863 = build_function_type_list (V4DF_type_node,
22864 V4DF_type_node, V2DF_type_node,
22867 tree void_ftype_pfloat_v8sf
22868 = build_function_type_list (void_type_node,
22869 pfloat_type_node, V8SF_type_node,
22871 tree void_ftype_pdouble_v4df
22872 = build_function_type_list (void_type_node,
22873 pdouble_type_node, V4DF_type_node,
22875 tree pv8sf_type_node = build_pointer_type (V8SF_type_node);
22876 tree pv4sf_type_node = build_pointer_type (V4SF_type_node);
22877 tree pv4df_type_node = build_pointer_type (V4DF_type_node);
22878 tree pv2df_type_node = build_pointer_type (V2DF_type_node);
22879 tree pcv8sf_type_node
22880 = build_pointer_type (build_type_variant (V8SF_type_node, 1, 0));
22881 tree pcv4df_type_node
22882 = build_pointer_type (build_type_variant (V4DF_type_node, 1, 0));
22883 tree v8sf_ftype_pcv8sf_v8sf
22884 = build_function_type_list (V8SF_type_node,
22885 pcv8sf_type_node, V8SF_type_node,
22887 tree v4df_ftype_pcv4df_v4df
22888 = build_function_type_list (V4DF_type_node,
22889 pcv4df_type_node, V4DF_type_node,
22891 tree v4sf_ftype_pcv4sf_v4sf
22892 = build_function_type_list (V4SF_type_node,
22893 pcv4sf_type_node, V4SF_type_node,
22895 tree v2df_ftype_pcv2df_v2df
22896 = build_function_type_list (V2DF_type_node,
22897 pcv2df_type_node, V2DF_type_node,
22899 tree void_ftype_pv8sf_v8sf_v8sf
22900 = build_function_type_list (void_type_node,
22901 pv8sf_type_node, V8SF_type_node,
22904 tree void_ftype_pv4df_v4df_v4df
22905 = build_function_type_list (void_type_node,
22906 pv4df_type_node, V4DF_type_node,
22909 tree void_ftype_pv4sf_v4sf_v4sf
22910 = build_function_type_list (void_type_node,
22911 pv4sf_type_node, V4SF_type_node,
22914 tree void_ftype_pv2df_v2df_v2df
22915 = build_function_type_list (void_type_node,
22916 pv2df_type_node, V2DF_type_node,
22919 tree v4df_ftype_v2df
22920 = build_function_type_list (V4DF_type_node,
22923 tree v8sf_ftype_v4sf
22924 = build_function_type_list (V8SF_type_node,
22927 tree v8si_ftype_v4si
22928 = build_function_type_list (V8SI_type_node,
22931 tree v2df_ftype_v4df
22932 = build_function_type_list (V2DF_type_node,
22935 tree v4sf_ftype_v8sf
22936 = build_function_type_list (V4SF_type_node,
22939 tree v4si_ftype_v8si
22940 = build_function_type_list (V4SI_type_node,
22943 tree int_ftype_v4df
22944 = build_function_type_list (integer_type_node,
22947 tree int_ftype_v8sf
22948 = build_function_type_list (integer_type_node,
22951 tree int_ftype_v8sf_v8sf
22952 = build_function_type_list (integer_type_node,
22953 V8SF_type_node, V8SF_type_node,
22955 tree int_ftype_v4di_v4di
22956 = build_function_type_list (integer_type_node,
22957 V4DI_type_node, V4DI_type_node,
22959 tree int_ftype_v4df_v4df
22960 = build_function_type_list (integer_type_node,
22961 V4DF_type_node, V4DF_type_node,
22963 tree v8sf_ftype_v8sf_v8si
22964 = build_function_type_list (V8SF_type_node,
22965 V8SF_type_node, V8SI_type_node,
22967 tree v4df_ftype_v4df_v4di
22968 = build_function_type_list (V4DF_type_node,
22969 V4DF_type_node, V4DI_type_node,
22971 tree v4sf_ftype_v4sf_v4si
22972 = build_function_type_list (V4SF_type_node,
22973 V4SF_type_node, V4SI_type_node, NULL_TREE);
22974 tree v2df_ftype_v2df_v2di
22975 = build_function_type_list (V2DF_type_node,
22976 V2DF_type_node, V2DI_type_node, NULL_TREE);
22980 /* Add all special builtins with variable number of operands. */
22981 for (i = 0, d = bdesc_special_args;
22982 i < ARRAY_SIZE (bdesc_special_args);
22990 switch ((enum ix86_special_builtin_type) d->flag)
22992 case VOID_FTYPE_VOID:
22993 type = void_ftype_void;
22995 case V32QI_FTYPE_PCCHAR:
22996 type = v32qi_ftype_pcchar;
22998 case V16QI_FTYPE_PCCHAR:
22999 type = v16qi_ftype_pcchar;
23001 case V8SF_FTYPE_PCV4SF:
23002 type = v8sf_ftype_pcv4sf;
23004 case V8SF_FTYPE_PCFLOAT:
23005 type = v8sf_ftype_pcfloat;
23007 case V4DF_FTYPE_PCV2DF:
23008 type = v4df_ftype_pcv2df;
23010 case V4DF_FTYPE_PCDOUBLE:
23011 type = v4df_ftype_pcdouble;
23013 case V4SF_FTYPE_PCFLOAT:
23014 type = v4sf_ftype_pcfloat;
23016 case V2DI_FTYPE_PV2DI:
23017 type = v2di_ftype_pv2di;
23019 case V2DF_FTYPE_PCDOUBLE:
23020 type = v2df_ftype_pcdouble;
23022 case V8SF_FTYPE_PCV8SF_V8SF:
23023 type = v8sf_ftype_pcv8sf_v8sf;
23025 case V4DF_FTYPE_PCV4DF_V4DF:
23026 type = v4df_ftype_pcv4df_v4df;
23028 case V4SF_FTYPE_V4SF_PCV2SF:
23029 type = v4sf_ftype_v4sf_pcv2sf;
23031 case V4SF_FTYPE_PCV4SF_V4SF:
23032 type = v4sf_ftype_pcv4sf_v4sf;
23034 case V2DF_FTYPE_V2DF_PCDOUBLE:
23035 type = v2df_ftype_v2df_pcdouble;
23037 case V2DF_FTYPE_PCV2DF_V2DF:
23038 type = v2df_ftype_pcv2df_v2df;
23040 case VOID_FTYPE_PV2SF_V4SF:
23041 type = void_ftype_pv2sf_v4sf;
23043 case VOID_FTYPE_PV4DI_V4DI:
23044 type = void_ftype_pv4di_v4di;
23046 case VOID_FTYPE_PV2DI_V2DI:
23047 type = void_ftype_pv2di_v2di;
23049 case VOID_FTYPE_PCHAR_V32QI:
23050 type = void_ftype_pchar_v32qi;
23052 case VOID_FTYPE_PCHAR_V16QI:
23053 type = void_ftype_pchar_v16qi;
23055 case VOID_FTYPE_PFLOAT_V8SF:
23056 type = void_ftype_pfloat_v8sf;
23058 case VOID_FTYPE_PFLOAT_V4SF:
23059 type = void_ftype_pfloat_v4sf;
23061 case VOID_FTYPE_PDOUBLE_V4DF:
23062 type = void_ftype_pdouble_v4df;
23064 case VOID_FTYPE_PDOUBLE_V2DF:
23065 type = void_ftype_pdouble_v2df;
23067 case VOID_FTYPE_PDI_DI:
23068 type = void_ftype_pdi_di;
23070 case VOID_FTYPE_PINT_INT:
23071 type = void_ftype_pint_int;
23073 case VOID_FTYPE_PV8SF_V8SF_V8SF:
23074 type = void_ftype_pv8sf_v8sf_v8sf;
23076 case VOID_FTYPE_PV4DF_V4DF_V4DF:
23077 type = void_ftype_pv4df_v4df_v4df;
23079 case VOID_FTYPE_PV4SF_V4SF_V4SF:
23080 type = void_ftype_pv4sf_v4sf_v4sf;
23082 case VOID_FTYPE_PV2DF_V2DF_V2DF:
23083 type = void_ftype_pv2df_v2df_v2df;
23086 gcc_unreachable ();
23089 def_builtin (d->mask, d->name, type, d->code);
23092 /* Add all builtins with variable number of operands. */
23093 for (i = 0, d = bdesc_args;
23094 i < ARRAY_SIZE (bdesc_args);
23102 switch ((enum ix86_builtin_type) d->flag)
23104 case FLOAT_FTYPE_FLOAT:
23105 type = float_ftype_float;
23107 case INT_FTYPE_V8SF_V8SF_PTEST:
23108 type = int_ftype_v8sf_v8sf;
23110 case INT_FTYPE_V4DI_V4DI_PTEST:
23111 type = int_ftype_v4di_v4di;
23113 case INT_FTYPE_V4DF_V4DF_PTEST:
23114 type = int_ftype_v4df_v4df;
23116 case INT_FTYPE_V4SF_V4SF_PTEST:
23117 type = int_ftype_v4sf_v4sf;
23119 case INT_FTYPE_V2DI_V2DI_PTEST:
23120 type = int_ftype_v2di_v2di;
23122 case INT_FTYPE_V2DF_V2DF_PTEST:
23123 type = int_ftype_v2df_v2df;
23125 case INT64_FTYPE_V4SF:
23126 type = int64_ftype_v4sf;
23128 case INT64_FTYPE_V2DF:
23129 type = int64_ftype_v2df;
23131 case INT_FTYPE_V16QI:
23132 type = int_ftype_v16qi;
23134 case INT_FTYPE_V8QI:
23135 type = int_ftype_v8qi;
23137 case INT_FTYPE_V8SF:
23138 type = int_ftype_v8sf;
23140 case INT_FTYPE_V4DF:
23141 type = int_ftype_v4df;
23143 case INT_FTYPE_V4SF:
23144 type = int_ftype_v4sf;
23146 case INT_FTYPE_V2DF:
23147 type = int_ftype_v2df;
23149 case V16QI_FTYPE_V16QI:
23150 type = v16qi_ftype_v16qi;
23152 case V8SI_FTYPE_V8SF:
23153 type = v8si_ftype_v8sf;
23155 case V8SI_FTYPE_V4SI:
23156 type = v8si_ftype_v4si;
23158 case V8HI_FTYPE_V8HI:
23159 type = v8hi_ftype_v8hi;
23161 case V8HI_FTYPE_V16QI:
23162 type = v8hi_ftype_v16qi;
23164 case V8QI_FTYPE_V8QI:
23165 type = v8qi_ftype_v8qi;
23167 case V8SF_FTYPE_V8SF:
23168 type = v8sf_ftype_v8sf;
23170 case V8SF_FTYPE_V8SI:
23171 type = v8sf_ftype_v8si;
23173 case V8SF_FTYPE_V4SF:
23174 type = v8sf_ftype_v4sf;
23176 case V4SI_FTYPE_V4DF:
23177 type = v4si_ftype_v4df;
23179 case V4SI_FTYPE_V4SI:
23180 type = v4si_ftype_v4si;
23182 case V4SI_FTYPE_V16QI:
23183 type = v4si_ftype_v16qi;
23185 case V4SI_FTYPE_V8SI:
23186 type = v4si_ftype_v8si;
23188 case V4SI_FTYPE_V8HI:
23189 type = v4si_ftype_v8hi;
23191 case V4SI_FTYPE_V4SF:
23192 type = v4si_ftype_v4sf;
23194 case V4SI_FTYPE_V2DF:
23195 type = v4si_ftype_v2df;
23197 case V4HI_FTYPE_V4HI:
23198 type = v4hi_ftype_v4hi;
23200 case V4DF_FTYPE_V4DF:
23201 type = v4df_ftype_v4df;
23203 case V4DF_FTYPE_V4SI:
23204 type = v4df_ftype_v4si;
23206 case V4DF_FTYPE_V4SF:
23207 type = v4df_ftype_v4sf;
23209 case V4DF_FTYPE_V2DF:
23210 type = v4df_ftype_v2df;
23212 case V4SF_FTYPE_V4SF:
23213 case V4SF_FTYPE_V4SF_VEC_MERGE:
23214 type = v4sf_ftype_v4sf;
23216 case V4SF_FTYPE_V8SF:
23217 type = v4sf_ftype_v8sf;
23219 case V4SF_FTYPE_V4SI:
23220 type = v4sf_ftype_v4si;
23222 case V4SF_FTYPE_V4DF:
23223 type = v4sf_ftype_v4df;
23225 case V4SF_FTYPE_V2DF:
23226 type = v4sf_ftype_v2df;
23228 case V2DI_FTYPE_V2DI:
23229 type = v2di_ftype_v2di;
23231 case V2DI_FTYPE_V16QI:
23232 type = v2di_ftype_v16qi;
23234 case V2DI_FTYPE_V8HI:
23235 type = v2di_ftype_v8hi;
23237 case V2DI_FTYPE_V4SI:
23238 type = v2di_ftype_v4si;
23240 case V2SI_FTYPE_V2SI:
23241 type = v2si_ftype_v2si;
23243 case V2SI_FTYPE_V4SF:
23244 type = v2si_ftype_v4sf;
23246 case V2SI_FTYPE_V2DF:
23247 type = v2si_ftype_v2df;
23249 case V2SI_FTYPE_V2SF:
23250 type = v2si_ftype_v2sf;
23252 case V2DF_FTYPE_V4DF:
23253 type = v2df_ftype_v4df;
23255 case V2DF_FTYPE_V4SF:
23256 type = v2df_ftype_v4sf;
23258 case V2DF_FTYPE_V2DF:
23259 case V2DF_FTYPE_V2DF_VEC_MERGE:
23260 type = v2df_ftype_v2df;
23262 case V2DF_FTYPE_V2SI:
23263 type = v2df_ftype_v2si;
23265 case V2DF_FTYPE_V4SI:
23266 type = v2df_ftype_v4si;
23268 case V2SF_FTYPE_V2SF:
23269 type = v2sf_ftype_v2sf;
23271 case V2SF_FTYPE_V2SI:
23272 type = v2sf_ftype_v2si;
23274 case V16QI_FTYPE_V16QI_V16QI:
23275 type = v16qi_ftype_v16qi_v16qi;
23277 case V16QI_FTYPE_V8HI_V8HI:
23278 type = v16qi_ftype_v8hi_v8hi;
23280 case V8QI_FTYPE_V8QI_V8QI:
23281 type = v8qi_ftype_v8qi_v8qi;
23283 case V8QI_FTYPE_V4HI_V4HI:
23284 type = v8qi_ftype_v4hi_v4hi;
23286 case V8HI_FTYPE_V8HI_V8HI:
23287 case V8HI_FTYPE_V8HI_V8HI_COUNT:
23288 type = v8hi_ftype_v8hi_v8hi;
23290 case V8HI_FTYPE_V16QI_V16QI:
23291 type = v8hi_ftype_v16qi_v16qi;
23293 case V8HI_FTYPE_V4SI_V4SI:
23294 type = v8hi_ftype_v4si_v4si;
23296 case V8HI_FTYPE_V8HI_SI_COUNT:
23297 type = v8hi_ftype_v8hi_int;
23299 case V8SF_FTYPE_V8SF_V8SF:
23300 type = v8sf_ftype_v8sf_v8sf;
23302 case V8SF_FTYPE_V8SF_V8SI:
23303 type = v8sf_ftype_v8sf_v8si;
23305 case V4SI_FTYPE_V4SI_V4SI:
23306 case V4SI_FTYPE_V4SI_V4SI_COUNT:
23307 type = v4si_ftype_v4si_v4si;
23309 case V4SI_FTYPE_V8HI_V8HI:
23310 type = v4si_ftype_v8hi_v8hi;
23312 case V4SI_FTYPE_V4SF_V4SF:
23313 type = v4si_ftype_v4sf_v4sf;
23315 case V4SI_FTYPE_V2DF_V2DF:
23316 type = v4si_ftype_v2df_v2df;
23318 case V4SI_FTYPE_V4SI_SI_COUNT:
23319 type = v4si_ftype_v4si_int;
23321 case V4HI_FTYPE_V4HI_V4HI:
23322 case V4HI_FTYPE_V4HI_V4HI_COUNT:
23323 type = v4hi_ftype_v4hi_v4hi;
23325 case V4HI_FTYPE_V8QI_V8QI:
23326 type = v4hi_ftype_v8qi_v8qi;
23328 case V4HI_FTYPE_V2SI_V2SI:
23329 type = v4hi_ftype_v2si_v2si;
23331 case V4HI_FTYPE_V4HI_SI_COUNT:
23332 type = v4hi_ftype_v4hi_int;
23334 case V4DF_FTYPE_V4DF_V4DF:
23335 type = v4df_ftype_v4df_v4df;
23337 case V4DF_FTYPE_V4DF_V4DI:
23338 type = v4df_ftype_v4df_v4di;
23340 case V4SF_FTYPE_V4SF_V4SF:
23341 case V4SF_FTYPE_V4SF_V4SF_SWAP:
23342 type = v4sf_ftype_v4sf_v4sf;
23344 case V4SF_FTYPE_V4SF_V4SI:
23345 type = v4sf_ftype_v4sf_v4si;
23347 case V4SF_FTYPE_V4SF_V2SI:
23348 type = v4sf_ftype_v4sf_v2si;
23350 case V4SF_FTYPE_V4SF_V2DF:
23351 type = v4sf_ftype_v4sf_v2df;
23353 case V4SF_FTYPE_V4SF_DI:
23354 type = v4sf_ftype_v4sf_int64;
23356 case V4SF_FTYPE_V4SF_SI:
23357 type = v4sf_ftype_v4sf_int;
23359 case V2DI_FTYPE_V2DI_V2DI:
23360 case V2DI_FTYPE_V2DI_V2DI_COUNT:
23361 type = v2di_ftype_v2di_v2di;
23363 case V2DI_FTYPE_V16QI_V16QI:
23364 type = v2di_ftype_v16qi_v16qi;
23366 case V2DI_FTYPE_V4SI_V4SI:
23367 type = v2di_ftype_v4si_v4si;
23369 case V2DI_FTYPE_V2DI_V16QI:
23370 type = v2di_ftype_v2di_v16qi;
23372 case V2DI_FTYPE_V2DF_V2DF:
23373 type = v2di_ftype_v2df_v2df;
23375 case V2DI_FTYPE_V2DI_SI_COUNT:
23376 type = v2di_ftype_v2di_int;
23378 case V2SI_FTYPE_V2SI_V2SI:
23379 case V2SI_FTYPE_V2SI_V2SI_COUNT:
23380 type = v2si_ftype_v2si_v2si;
23382 case V2SI_FTYPE_V4HI_V4HI:
23383 type = v2si_ftype_v4hi_v4hi;
23385 case V2SI_FTYPE_V2SF_V2SF:
23386 type = v2si_ftype_v2sf_v2sf;
23388 case V2SI_FTYPE_V2SI_SI_COUNT:
23389 type = v2si_ftype_v2si_int;
23391 case V2DF_FTYPE_V2DF_V2DF:
23392 case V2DF_FTYPE_V2DF_V2DF_SWAP:
23393 type = v2df_ftype_v2df_v2df;
23395 case V2DF_FTYPE_V2DF_V4SF:
23396 type = v2df_ftype_v2df_v4sf;
23398 case V2DF_FTYPE_V2DF_V2DI:
23399 type = v2df_ftype_v2df_v2di;
23401 case V2DF_FTYPE_V2DF_DI:
23402 type = v2df_ftype_v2df_int64;
23404 case V2DF_FTYPE_V2DF_SI:
23405 type = v2df_ftype_v2df_int;
23407 case V2SF_FTYPE_V2SF_V2SF:
23408 type = v2sf_ftype_v2sf_v2sf;
23410 case V1DI_FTYPE_V1DI_V1DI:
23411 case V1DI_FTYPE_V1DI_V1DI_COUNT:
23412 type = v1di_ftype_v1di_v1di;
23414 case V1DI_FTYPE_V8QI_V8QI:
23415 type = v1di_ftype_v8qi_v8qi;
23417 case V1DI_FTYPE_V2SI_V2SI:
23418 type = v1di_ftype_v2si_v2si;
23420 case V1DI_FTYPE_V1DI_SI_COUNT:
23421 type = v1di_ftype_v1di_int;
23423 case UINT64_FTYPE_UINT64_UINT64:
23424 type = uint64_ftype_uint64_uint64;
23426 case UINT_FTYPE_UINT_UINT:
23427 type = unsigned_ftype_unsigned_unsigned;
23429 case UINT_FTYPE_UINT_USHORT:
23430 type = unsigned_ftype_unsigned_ushort;
23432 case UINT_FTYPE_UINT_UCHAR:
23433 type = unsigned_ftype_unsigned_uchar;
23435 case V8HI_FTYPE_V8HI_INT:
23436 type = v8hi_ftype_v8hi_int;
23438 case V8SF_FTYPE_V8SF_INT:
23439 type = v8sf_ftype_v8sf_int;
23441 case V4SI_FTYPE_V4SI_INT:
23442 type = v4si_ftype_v4si_int;
23444 case V4SI_FTYPE_V8SI_INT:
23445 type = v4si_ftype_v8si_int;
23447 case V4HI_FTYPE_V4HI_INT:
23448 type = v4hi_ftype_v4hi_int;
23450 case V4DF_FTYPE_V4DF_INT:
23451 type = v4df_ftype_v4df_int;
23453 case V4SF_FTYPE_V4SF_INT:
23454 type = v4sf_ftype_v4sf_int;
23456 case V4SF_FTYPE_V8SF_INT:
23457 type = v4sf_ftype_v8sf_int;
23459 case V2DI_FTYPE_V2DI_INT:
23460 case V2DI2TI_FTYPE_V2DI_INT:
23461 type = v2di_ftype_v2di_int;
23463 case V2DF_FTYPE_V2DF_INT:
23464 type = v2df_ftype_v2df_int;
23466 case V2DF_FTYPE_V4DF_INT:
23467 type = v2df_ftype_v4df_int;
23469 case V16QI_FTYPE_V16QI_V16QI_V16QI:
23470 type = v16qi_ftype_v16qi_v16qi_v16qi;
23472 case V8SF_FTYPE_V8SF_V8SF_V8SF:
23473 type = v8sf_ftype_v8sf_v8sf_v8sf;
23475 case V4DF_FTYPE_V4DF_V4DF_V4DF:
23476 type = v4df_ftype_v4df_v4df_v4df;
23478 case V4SF_FTYPE_V4SF_V4SF_V4SF:
23479 type = v4sf_ftype_v4sf_v4sf_v4sf;
23481 case V2DF_FTYPE_V2DF_V2DF_V2DF:
23482 type = v2df_ftype_v2df_v2df_v2df;
23484 case V16QI_FTYPE_V16QI_V16QI_INT:
23485 type = v16qi_ftype_v16qi_v16qi_int;
23487 case V8SI_FTYPE_V8SI_V8SI_INT:
23488 type = v8si_ftype_v8si_v8si_int;
23490 case V8SI_FTYPE_V8SI_V4SI_INT:
23491 type = v8si_ftype_v8si_v4si_int;
23493 case V8HI_FTYPE_V8HI_V8HI_INT:
23494 type = v8hi_ftype_v8hi_v8hi_int;
23496 case V8SF_FTYPE_V8SF_V8SF_INT:
23497 type = v8sf_ftype_v8sf_v8sf_int;
23499 case V8SF_FTYPE_V8SF_V4SF_INT:
23500 type = v8sf_ftype_v8sf_v4sf_int;
23502 case V4SI_FTYPE_V4SI_V4SI_INT:
23503 type = v4si_ftype_v4si_v4si_int;
23505 case V4DF_FTYPE_V4DF_V4DF_INT:
23506 type = v4df_ftype_v4df_v4df_int;
23508 case V4DF_FTYPE_V4DF_V2DF_INT:
23509 type = v4df_ftype_v4df_v2df_int;
23511 case V4SF_FTYPE_V4SF_V4SF_INT:
23512 type = v4sf_ftype_v4sf_v4sf_int;
23514 case V2DI_FTYPE_V2DI_V2DI_INT:
23515 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
23516 type = v2di_ftype_v2di_v2di_int;
23518 case V2DF_FTYPE_V2DF_V2DF_INT:
23519 type = v2df_ftype_v2df_v2df_int;
23521 case V2DI_FTYPE_V2DI_UINT_UINT:
23522 type = v2di_ftype_v2di_unsigned_unsigned;
23524 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
23525 type = v2di_ftype_v2di_v2di_unsigned_unsigned;
23527 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
23528 type = v1di_ftype_v1di_v1di_int;
23531 gcc_unreachable ();
23534 def_builtin_const (d->mask, d->name, type, d->code);
23537 /* pcmpestr[im] insns. */
23538 for (i = 0, d = bdesc_pcmpestr;
23539 i < ARRAY_SIZE (bdesc_pcmpestr);
23542 if (d->code == IX86_BUILTIN_PCMPESTRM128)
23543 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
23545 ftype = int_ftype_v16qi_int_v16qi_int_int;
23546 def_builtin_const (d->mask, d->name, ftype, d->code);
23549 /* pcmpistr[im] insns. */
23550 for (i = 0, d = bdesc_pcmpistr;
23551 i < ARRAY_SIZE (bdesc_pcmpistr);
23554 if (d->code == IX86_BUILTIN_PCMPISTRM128)
23555 ftype = v16qi_ftype_v16qi_v16qi_int;
23557 ftype = int_ftype_v16qi_v16qi_int;
23558 def_builtin_const (d->mask, d->name, ftype, d->code);
23561 /* comi/ucomi insns. */
23562 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
23563 if (d->mask == OPTION_MASK_ISA_SSE2)
23564 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
23566 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
23569 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
23570 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
23572 /* SSE or 3DNow!A */
23573 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
23576 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
23578 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
23579 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
23582 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
23583 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
23586 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
23587 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
23588 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
23589 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
23590 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
23591 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
23594 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
23597 def_builtin (OPTION_MASK_ISA_AVX, "__builtin_ia32_vzeroupper", void_ftype_void,
23598 TARGET_64BIT ? IX86_BUILTIN_VZEROUPPER_REX64 : IX86_BUILTIN_VZEROUPPER);
23600 /* Access to the vec_init patterns. */
23601 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
23602 integer_type_node, NULL_TREE);
23603 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
23605 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
23606 short_integer_type_node,
23607 short_integer_type_node,
23608 short_integer_type_node, NULL_TREE);
23609 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
23611 ftype = build_function_type_list (V8QI_type_node, char_type_node,
23612 char_type_node, char_type_node,
23613 char_type_node, char_type_node,
23614 char_type_node, char_type_node,
23615 char_type_node, NULL_TREE);
23616 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
23618 /* Access to the vec_extract patterns. */
23619 ftype = build_function_type_list (double_type_node, V2DF_type_node,
23620 integer_type_node, NULL_TREE);
23621 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
23623 ftype = build_function_type_list (long_long_integer_type_node,
23624 V2DI_type_node, integer_type_node,
23626 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
23628 ftype = build_function_type_list (float_type_node, V4SF_type_node,
23629 integer_type_node, NULL_TREE);
23630 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
23632 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
23633 integer_type_node, NULL_TREE);
23634 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
23636 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
23637 integer_type_node, NULL_TREE);
23638 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
23640 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
23641 integer_type_node, NULL_TREE);
23642 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
23644 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
23645 integer_type_node, NULL_TREE);
23646 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
23648 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
23649 integer_type_node, NULL_TREE);
23650 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
23652 /* Access to the vec_set patterns. */
23653 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
23655 integer_type_node, NULL_TREE);
23656 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
23658 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
23660 integer_type_node, NULL_TREE);
23661 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
23663 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
23665 integer_type_node, NULL_TREE);
23666 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
23668 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
23670 integer_type_node, NULL_TREE);
23671 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
23673 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
23675 integer_type_node, NULL_TREE);
23676 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
23678 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
23680 integer_type_node, NULL_TREE);
23681 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
23683 /* Add SSE5 multi-arg argument instructions */
23684 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
23686 tree mtype = NULL_TREE;
23691 switch ((enum multi_arg_type)d->flag)
23693 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
23694 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
23695 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
23696 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
23697 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
23698 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
23699 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
23700 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
23701 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
23702 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
23703 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
23704 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
23705 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
23706 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
23707 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
23708 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
23709 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
23710 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
23711 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
23712 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
23713 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
23714 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
23715 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
23716 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
23717 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
23718 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
23719 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
23720 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
23721 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
23722 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
23723 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
23724 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
23725 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
23726 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
23727 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
23728 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
23729 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
23730 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
23731 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
23732 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
23733 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
23734 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
23735 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
23736 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
23737 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
23738 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
23739 case MULTI_ARG_UNKNOWN:
23741 gcc_unreachable ();
23745 def_builtin_const (d->mask, d->name, mtype, d->code);
23749 /* Internal method for ix86_init_builtins. */
23752 ix86_init_builtins_va_builtins_abi (void)
23754 tree ms_va_ref, sysv_va_ref;
23755 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
23756 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
23757 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
23758 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
23762 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
23763 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
23764 ms_va_ref = build_reference_type (ms_va_list_type_node);
23766 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
23769 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23770 fnvoid_va_start_ms =
23771 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23772 fnvoid_va_end_sysv =
23773 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
23774 fnvoid_va_start_sysv =
23775 build_varargs_function_type_list (void_type_node, sysv_va_ref,
23777 fnvoid_va_copy_ms =
23778 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
23780 fnvoid_va_copy_sysv =
23781 build_function_type_list (void_type_node, sysv_va_ref,
23782 sysv_va_ref, NULL_TREE);
23784 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
23785 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
23786 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
23787 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
23788 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
23789 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
23790 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
23791 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23792 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
23793 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23794 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
23795 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23799 ix86_init_builtins (void)
23801 tree float128_type_node = make_node (REAL_TYPE);
23804 /* The __float80 type. */
23805 if (TYPE_MODE (long_double_type_node) == XFmode)
23806 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
23810 /* The __float80 type. */
23811 tree float80_type_node = make_node (REAL_TYPE);
23813 TYPE_PRECISION (float80_type_node) = 80;
23814 layout_type (float80_type_node);
23815 (*lang_hooks.types.register_builtin_type) (float80_type_node,
23819 /* The __float128 type. */
23820 TYPE_PRECISION (float128_type_node) = 128;
23821 layout_type (float128_type_node);
23822 (*lang_hooks.types.register_builtin_type) (float128_type_node,
23825 /* TFmode support builtins. */
23826 ftype = build_function_type (float128_type_node, void_list_node);
23827 decl = add_builtin_function ("__builtin_infq", ftype,
23828 IX86_BUILTIN_INFQ, BUILT_IN_MD,
23830 ix86_builtins[(int) IX86_BUILTIN_INFQ] = decl;
23832 decl = add_builtin_function ("__builtin_huge_valq", ftype,
23833 IX86_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
23835 ix86_builtins[(int) IX86_BUILTIN_HUGE_VALQ] = decl;
23837 /* We will expand them to normal call if SSE2 isn't available since
23838 they are used by libgcc. */
23839 ftype = build_function_type_list (float128_type_node,
23840 float128_type_node,
23842 decl = add_builtin_function ("__builtin_fabsq", ftype,
23843 IX86_BUILTIN_FABSQ, BUILT_IN_MD,
23844 "__fabstf2", NULL_TREE);
23845 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl;
23846 TREE_READONLY (decl) = 1;
23848 ftype = build_function_type_list (float128_type_node,
23849 float128_type_node,
23850 float128_type_node,
23852 decl = add_builtin_function ("__builtin_copysignq", ftype,
23853 IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
23854 "__copysigntf3", NULL_TREE);
23855 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl;
23856 TREE_READONLY (decl) = 1;
23858 ix86_init_mmx_sse_builtins ();
23860 ix86_init_builtins_va_builtins_abi ();
23863 /* Errors in the source file can cause expand_expr to return const0_rtx
23864 where we expect a vector. To avoid crashing, use one of the vector
23865 clear instructions. */
23867 safe_vector_operand (rtx x, enum machine_mode mode)
23869 if (x == const0_rtx)
23870 x = CONST0_RTX (mode);
23874 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
23877 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
23880 tree arg0 = CALL_EXPR_ARG (exp, 0);
23881 tree arg1 = CALL_EXPR_ARG (exp, 1);
23882 rtx op0 = expand_normal (arg0);
23883 rtx op1 = expand_normal (arg1);
23884 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23885 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
23886 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
23888 if (VECTOR_MODE_P (mode0))
23889 op0 = safe_vector_operand (op0, mode0);
23890 if (VECTOR_MODE_P (mode1))
23891 op1 = safe_vector_operand (op1, mode1);
23893 if (optimize || !target
23894 || GET_MODE (target) != tmode
23895 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23896 target = gen_reg_rtx (tmode);
23898 if (GET_MODE (op1) == SImode && mode1 == TImode)
23900 rtx x = gen_reg_rtx (V4SImode);
23901 emit_insn (gen_sse2_loadd (x, op1));
23902 op1 = gen_lowpart (TImode, x);
23905 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
23906 op0 = copy_to_mode_reg (mode0, op0);
23907 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
23908 op1 = copy_to_mode_reg (mode1, op1);
23910 pat = GEN_FCN (icode) (target, op0, op1);
23919 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
23922 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
23923 enum multi_arg_type m_type,
23924 enum rtx_code sub_code)
23929 bool comparison_p = false;
23931 bool last_arg_constant = false;
23932 int num_memory = 0;
23935 enum machine_mode mode;
23938 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23942 case MULTI_ARG_3_SF:
23943 case MULTI_ARG_3_DF:
23944 case MULTI_ARG_3_DI:
23945 case MULTI_ARG_3_SI:
23946 case MULTI_ARG_3_SI_DI:
23947 case MULTI_ARG_3_HI:
23948 case MULTI_ARG_3_HI_SI:
23949 case MULTI_ARG_3_QI:
23950 case MULTI_ARG_3_PERMPS:
23951 case MULTI_ARG_3_PERMPD:
23955 case MULTI_ARG_2_SF:
23956 case MULTI_ARG_2_DF:
23957 case MULTI_ARG_2_DI:
23958 case MULTI_ARG_2_SI:
23959 case MULTI_ARG_2_HI:
23960 case MULTI_ARG_2_QI:
23964 case MULTI_ARG_2_DI_IMM:
23965 case MULTI_ARG_2_SI_IMM:
23966 case MULTI_ARG_2_HI_IMM:
23967 case MULTI_ARG_2_QI_IMM:
23969 last_arg_constant = true;
23972 case MULTI_ARG_1_SF:
23973 case MULTI_ARG_1_DF:
23974 case MULTI_ARG_1_DI:
23975 case MULTI_ARG_1_SI:
23976 case MULTI_ARG_1_HI:
23977 case MULTI_ARG_1_QI:
23978 case MULTI_ARG_1_SI_DI:
23979 case MULTI_ARG_1_HI_DI:
23980 case MULTI_ARG_1_HI_SI:
23981 case MULTI_ARG_1_QI_DI:
23982 case MULTI_ARG_1_QI_SI:
23983 case MULTI_ARG_1_QI_HI:
23984 case MULTI_ARG_1_PH2PS:
23985 case MULTI_ARG_1_PS2PH:
23989 case MULTI_ARG_2_SF_CMP:
23990 case MULTI_ARG_2_DF_CMP:
23991 case MULTI_ARG_2_DI_CMP:
23992 case MULTI_ARG_2_SI_CMP:
23993 case MULTI_ARG_2_HI_CMP:
23994 case MULTI_ARG_2_QI_CMP:
23996 comparison_p = true;
23999 case MULTI_ARG_2_SF_TF:
24000 case MULTI_ARG_2_DF_TF:
24001 case MULTI_ARG_2_DI_TF:
24002 case MULTI_ARG_2_SI_TF:
24003 case MULTI_ARG_2_HI_TF:
24004 case MULTI_ARG_2_QI_TF:
24009 case MULTI_ARG_UNKNOWN:
24011 gcc_unreachable ();
24014 if (optimize || !target
24015 || GET_MODE (target) != tmode
24016 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24017 target = gen_reg_rtx (tmode);
24019 gcc_assert (nargs <= 4);
24021 for (i = 0; i < nargs; i++)
24023 tree arg = CALL_EXPR_ARG (exp, i);
24024 rtx op = expand_normal (arg);
24025 int adjust = (comparison_p) ? 1 : 0;
24026 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
24028 if (last_arg_constant && i == nargs-1)
24030 if (GET_CODE (op) != CONST_INT)
24032 error ("last argument must be an immediate");
24033 return gen_reg_rtx (tmode);
24038 if (VECTOR_MODE_P (mode))
24039 op = safe_vector_operand (op, mode);
24041 /* If we aren't optimizing, only allow one memory operand to be
24043 if (memory_operand (op, mode))
24046 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
24049 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
24051 op = force_reg (mode, op);
24055 args[i].mode = mode;
24061 pat = GEN_FCN (icode) (target, args[0].op);
24066 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
24067 GEN_INT ((int)sub_code));
24068 else if (! comparison_p)
24069 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
24072 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
24076 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
24081 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
24085 gcc_unreachable ();
24095 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
24096 insns with vec_merge. */
24099 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
24103 tree arg0 = CALL_EXPR_ARG (exp, 0);
24104 rtx op1, op0 = expand_normal (arg0);
24105 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24106 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24108 if (optimize || !target
24109 || GET_MODE (target) != tmode
24110 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24111 target = gen_reg_rtx (tmode);
24113 if (VECTOR_MODE_P (mode0))
24114 op0 = safe_vector_operand (op0, mode0);
24116 if ((optimize && !register_operand (op0, mode0))
24117 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24118 op0 = copy_to_mode_reg (mode0, op0);
24121 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
24122 op1 = copy_to_mode_reg (mode0, op1);
24124 pat = GEN_FCN (icode) (target, op0, op1);
24131 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
24134 ix86_expand_sse_compare (const struct builtin_description *d,
24135 tree exp, rtx target, bool swap)
24138 tree arg0 = CALL_EXPR_ARG (exp, 0);
24139 tree arg1 = CALL_EXPR_ARG (exp, 1);
24140 rtx op0 = expand_normal (arg0);
24141 rtx op1 = expand_normal (arg1);
24143 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
24144 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
24145 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
24146 enum rtx_code comparison = d->comparison;
24148 if (VECTOR_MODE_P (mode0))
24149 op0 = safe_vector_operand (op0, mode0);
24150 if (VECTOR_MODE_P (mode1))
24151 op1 = safe_vector_operand (op1, mode1);
24153 /* Swap operands if we have a comparison that isn't available in
24157 rtx tmp = gen_reg_rtx (mode1);
24158 emit_move_insn (tmp, op1);
24163 if (optimize || !target
24164 || GET_MODE (target) != tmode
24165 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
24166 target = gen_reg_rtx (tmode);
24168 if ((optimize && !register_operand (op0, mode0))
24169 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
24170 op0 = copy_to_mode_reg (mode0, op0);
24171 if ((optimize && !register_operand (op1, mode1))
24172 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
24173 op1 = copy_to_mode_reg (mode1, op1);
24175 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
24176 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
24183 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
24186 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
24190 tree arg0 = CALL_EXPR_ARG (exp, 0);
24191 tree arg1 = CALL_EXPR_ARG (exp, 1);
24192 rtx op0 = expand_normal (arg0);
24193 rtx op1 = expand_normal (arg1);
24194 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24195 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24196 enum rtx_code comparison = d->comparison;
24198 if (VECTOR_MODE_P (mode0))
24199 op0 = safe_vector_operand (op0, mode0);
24200 if (VECTOR_MODE_P (mode1))
24201 op1 = safe_vector_operand (op1, mode1);
24203 /* Swap operands if we have a comparison that isn't available in
24205 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
24212 target = gen_reg_rtx (SImode);
24213 emit_move_insn (target, const0_rtx);
24214 target = gen_rtx_SUBREG (QImode, target, 0);
24216 if ((optimize && !register_operand (op0, mode0))
24217 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
24218 op0 = copy_to_mode_reg (mode0, op0);
24219 if ((optimize && !register_operand (op1, mode1))
24220 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
24221 op1 = copy_to_mode_reg (mode1, op1);
24223 pat = GEN_FCN (d->icode) (op0, op1);
24227 emit_insn (gen_rtx_SET (VOIDmode,
24228 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24229 gen_rtx_fmt_ee (comparison, QImode,
24233 return SUBREG_REG (target);
24236 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
24239 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
24243 tree arg0 = CALL_EXPR_ARG (exp, 0);
24244 tree arg1 = CALL_EXPR_ARG (exp, 1);
24245 rtx op0 = expand_normal (arg0);
24246 rtx op1 = expand_normal (arg1);
24247 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24248 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24249 enum rtx_code comparison = d->comparison;
24251 if (VECTOR_MODE_P (mode0))
24252 op0 = safe_vector_operand (op0, mode0);
24253 if (VECTOR_MODE_P (mode1))
24254 op1 = safe_vector_operand (op1, mode1);
24256 target = gen_reg_rtx (SImode);
24257 emit_move_insn (target, const0_rtx);
24258 target = gen_rtx_SUBREG (QImode, target, 0);
24260 if ((optimize && !register_operand (op0, mode0))
24261 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
24262 op0 = copy_to_mode_reg (mode0, op0);
24263 if ((optimize && !register_operand (op1, mode1))
24264 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
24265 op1 = copy_to_mode_reg (mode1, op1);
24267 pat = GEN_FCN (d->icode) (op0, op1);
24271 emit_insn (gen_rtx_SET (VOIDmode,
24272 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24273 gen_rtx_fmt_ee (comparison, QImode,
24277 return SUBREG_REG (target);
24280 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
24283 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
24284 tree exp, rtx target)
24287 tree arg0 = CALL_EXPR_ARG (exp, 0);
24288 tree arg1 = CALL_EXPR_ARG (exp, 1);
24289 tree arg2 = CALL_EXPR_ARG (exp, 2);
24290 tree arg3 = CALL_EXPR_ARG (exp, 3);
24291 tree arg4 = CALL_EXPR_ARG (exp, 4);
24292 rtx scratch0, scratch1;
24293 rtx op0 = expand_normal (arg0);
24294 rtx op1 = expand_normal (arg1);
24295 rtx op2 = expand_normal (arg2);
24296 rtx op3 = expand_normal (arg3);
24297 rtx op4 = expand_normal (arg4);
24298 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
24300 tmode0 = insn_data[d->icode].operand[0].mode;
24301 tmode1 = insn_data[d->icode].operand[1].mode;
24302 modev2 = insn_data[d->icode].operand[2].mode;
24303 modei3 = insn_data[d->icode].operand[3].mode;
24304 modev4 = insn_data[d->icode].operand[4].mode;
24305 modei5 = insn_data[d->icode].operand[5].mode;
24306 modeimm = insn_data[d->icode].operand[6].mode;
24308 if (VECTOR_MODE_P (modev2))
24309 op0 = safe_vector_operand (op0, modev2);
24310 if (VECTOR_MODE_P (modev4))
24311 op2 = safe_vector_operand (op2, modev4);
24313 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
24314 op0 = copy_to_mode_reg (modev2, op0);
24315 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
24316 op1 = copy_to_mode_reg (modei3, op1);
24317 if ((optimize && !register_operand (op2, modev4))
24318 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
24319 op2 = copy_to_mode_reg (modev4, op2);
24320 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
24321 op3 = copy_to_mode_reg (modei5, op3);
24323 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
24325 error ("the fifth argument must be a 8-bit immediate");
24329 if (d->code == IX86_BUILTIN_PCMPESTRI128)
24331 if (optimize || !target
24332 || GET_MODE (target) != tmode0
24333 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
24334 target = gen_reg_rtx (tmode0);
24336 scratch1 = gen_reg_rtx (tmode1);
24338 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
24340 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
24342 if (optimize || !target
24343 || GET_MODE (target) != tmode1
24344 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
24345 target = gen_reg_rtx (tmode1);
24347 scratch0 = gen_reg_rtx (tmode0);
24349 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
24353 gcc_assert (d->flag);
24355 scratch0 = gen_reg_rtx (tmode0);
24356 scratch1 = gen_reg_rtx (tmode1);
24358 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
24368 target = gen_reg_rtx (SImode);
24369 emit_move_insn (target, const0_rtx);
24370 target = gen_rtx_SUBREG (QImode, target, 0);
24373 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24374 gen_rtx_fmt_ee (EQ, QImode,
24375 gen_rtx_REG ((enum machine_mode) d->flag,
24378 return SUBREG_REG (target);
24385 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
24388 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
24389 tree exp, rtx target)
24392 tree arg0 = CALL_EXPR_ARG (exp, 0);
24393 tree arg1 = CALL_EXPR_ARG (exp, 1);
24394 tree arg2 = CALL_EXPR_ARG (exp, 2);
24395 rtx scratch0, scratch1;
24396 rtx op0 = expand_normal (arg0);
24397 rtx op1 = expand_normal (arg1);
24398 rtx op2 = expand_normal (arg2);
24399 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
24401 tmode0 = insn_data[d->icode].operand[0].mode;
24402 tmode1 = insn_data[d->icode].operand[1].mode;
24403 modev2 = insn_data[d->icode].operand[2].mode;
24404 modev3 = insn_data[d->icode].operand[3].mode;
24405 modeimm = insn_data[d->icode].operand[4].mode;
24407 if (VECTOR_MODE_P (modev2))
24408 op0 = safe_vector_operand (op0, modev2);
24409 if (VECTOR_MODE_P (modev3))
24410 op1 = safe_vector_operand (op1, modev3);
24412 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
24413 op0 = copy_to_mode_reg (modev2, op0);
24414 if ((optimize && !register_operand (op1, modev3))
24415 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
24416 op1 = copy_to_mode_reg (modev3, op1);
24418 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
24420 error ("the third argument must be a 8-bit immediate");
24424 if (d->code == IX86_BUILTIN_PCMPISTRI128)
24426 if (optimize || !target
24427 || GET_MODE (target) != tmode0
24428 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
24429 target = gen_reg_rtx (tmode0);
24431 scratch1 = gen_reg_rtx (tmode1);
24433 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
24435 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
24437 if (optimize || !target
24438 || GET_MODE (target) != tmode1
24439 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
24440 target = gen_reg_rtx (tmode1);
24442 scratch0 = gen_reg_rtx (tmode0);
24444 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
24448 gcc_assert (d->flag);
24450 scratch0 = gen_reg_rtx (tmode0);
24451 scratch1 = gen_reg_rtx (tmode1);
24453 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
24463 target = gen_reg_rtx (SImode);
24464 emit_move_insn (target, const0_rtx);
24465 target = gen_rtx_SUBREG (QImode, target, 0);
24468 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24469 gen_rtx_fmt_ee (EQ, QImode,
24470 gen_rtx_REG ((enum machine_mode) d->flag,
24473 return SUBREG_REG (target);
24479 /* Subroutine of ix86_expand_builtin to take care of insns with
24480 variable number of operands. */
24483 ix86_expand_args_builtin (const struct builtin_description *d,
24484 tree exp, rtx target)
24486 rtx pat, real_target;
24487 unsigned int i, nargs;
24488 unsigned int nargs_constant = 0;
24489 int num_memory = 0;
24493 enum machine_mode mode;
24495 bool last_arg_count = false;
24496 enum insn_code icode = d->icode;
24497 const struct insn_data *insn_p = &insn_data[icode];
24498 enum machine_mode tmode = insn_p->operand[0].mode;
24499 enum machine_mode rmode = VOIDmode;
24501 enum rtx_code comparison = d->comparison;
24503 switch ((enum ix86_builtin_type) d->flag)
24505 case INT_FTYPE_V8SF_V8SF_PTEST:
24506 case INT_FTYPE_V4DI_V4DI_PTEST:
24507 case INT_FTYPE_V4DF_V4DF_PTEST:
24508 case INT_FTYPE_V4SF_V4SF_PTEST:
24509 case INT_FTYPE_V2DI_V2DI_PTEST:
24510 case INT_FTYPE_V2DF_V2DF_PTEST:
24511 return ix86_expand_sse_ptest (d, exp, target);
24512 case FLOAT128_FTYPE_FLOAT128:
24513 case FLOAT_FTYPE_FLOAT:
24514 case INT64_FTYPE_V4SF:
24515 case INT64_FTYPE_V2DF:
24516 case INT_FTYPE_V16QI:
24517 case INT_FTYPE_V8QI:
24518 case INT_FTYPE_V8SF:
24519 case INT_FTYPE_V4DF:
24520 case INT_FTYPE_V4SF:
24521 case INT_FTYPE_V2DF:
24522 case V16QI_FTYPE_V16QI:
24523 case V8SI_FTYPE_V8SF:
24524 case V8SI_FTYPE_V4SI:
24525 case V8HI_FTYPE_V8HI:
24526 case V8HI_FTYPE_V16QI:
24527 case V8QI_FTYPE_V8QI:
24528 case V8SF_FTYPE_V8SF:
24529 case V8SF_FTYPE_V8SI:
24530 case V8SF_FTYPE_V4SF:
24531 case V4SI_FTYPE_V4SI:
24532 case V4SI_FTYPE_V16QI:
24533 case V4SI_FTYPE_V4SF:
24534 case V4SI_FTYPE_V8SI:
24535 case V4SI_FTYPE_V8HI:
24536 case V4SI_FTYPE_V4DF:
24537 case V4SI_FTYPE_V2DF:
24538 case V4HI_FTYPE_V4HI:
24539 case V4DF_FTYPE_V4DF:
24540 case V4DF_FTYPE_V4SI:
24541 case V4DF_FTYPE_V4SF:
24542 case V4DF_FTYPE_V2DF:
24543 case V4SF_FTYPE_V4SF:
24544 case V4SF_FTYPE_V4SI:
24545 case V4SF_FTYPE_V8SF:
24546 case V4SF_FTYPE_V4DF:
24547 case V4SF_FTYPE_V2DF:
24548 case V2DI_FTYPE_V2DI:
24549 case V2DI_FTYPE_V16QI:
24550 case V2DI_FTYPE_V8HI:
24551 case V2DI_FTYPE_V4SI:
24552 case V2DF_FTYPE_V2DF:
24553 case V2DF_FTYPE_V4SI:
24554 case V2DF_FTYPE_V4DF:
24555 case V2DF_FTYPE_V4SF:
24556 case V2DF_FTYPE_V2SI:
24557 case V2SI_FTYPE_V2SI:
24558 case V2SI_FTYPE_V4SF:
24559 case V2SI_FTYPE_V2SF:
24560 case V2SI_FTYPE_V2DF:
24561 case V2SF_FTYPE_V2SF:
24562 case V2SF_FTYPE_V2SI:
24565 case V4SF_FTYPE_V4SF_VEC_MERGE:
24566 case V2DF_FTYPE_V2DF_VEC_MERGE:
24567 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
24568 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
24569 case V16QI_FTYPE_V16QI_V16QI:
24570 case V16QI_FTYPE_V8HI_V8HI:
24571 case V8QI_FTYPE_V8QI_V8QI:
24572 case V8QI_FTYPE_V4HI_V4HI:
24573 case V8HI_FTYPE_V8HI_V8HI:
24574 case V8HI_FTYPE_V16QI_V16QI:
24575 case V8HI_FTYPE_V4SI_V4SI:
24576 case V8SF_FTYPE_V8SF_V8SF:
24577 case V8SF_FTYPE_V8SF_V8SI:
24578 case V4SI_FTYPE_V4SI_V4SI:
24579 case V4SI_FTYPE_V8HI_V8HI:
24580 case V4SI_FTYPE_V4SF_V4SF:
24581 case V4SI_FTYPE_V2DF_V2DF:
24582 case V4HI_FTYPE_V4HI_V4HI:
24583 case V4HI_FTYPE_V8QI_V8QI:
24584 case V4HI_FTYPE_V2SI_V2SI:
24585 case V4DF_FTYPE_V4DF_V4DF:
24586 case V4DF_FTYPE_V4DF_V4DI:
24587 case V4SF_FTYPE_V4SF_V4SF:
24588 case V4SF_FTYPE_V4SF_V4SI:
24589 case V4SF_FTYPE_V4SF_V2SI:
24590 case V4SF_FTYPE_V4SF_V2DF:
24591 case V4SF_FTYPE_V4SF_DI:
24592 case V4SF_FTYPE_V4SF_SI:
24593 case V2DI_FTYPE_V2DI_V2DI:
24594 case V2DI_FTYPE_V16QI_V16QI:
24595 case V2DI_FTYPE_V4SI_V4SI:
24596 case V2DI_FTYPE_V2DI_V16QI:
24597 case V2DI_FTYPE_V2DF_V2DF:
24598 case V2SI_FTYPE_V2SI_V2SI:
24599 case V2SI_FTYPE_V4HI_V4HI:
24600 case V2SI_FTYPE_V2SF_V2SF:
24601 case V2DF_FTYPE_V2DF_V2DF:
24602 case V2DF_FTYPE_V2DF_V4SF:
24603 case V2DF_FTYPE_V2DF_V2DI:
24604 case V2DF_FTYPE_V2DF_DI:
24605 case V2DF_FTYPE_V2DF_SI:
24606 case V2SF_FTYPE_V2SF_V2SF:
24607 case V1DI_FTYPE_V1DI_V1DI:
24608 case V1DI_FTYPE_V8QI_V8QI:
24609 case V1DI_FTYPE_V2SI_V2SI:
24610 if (comparison == UNKNOWN)
24611 return ix86_expand_binop_builtin (icode, exp, target);
24614 case V4SF_FTYPE_V4SF_V4SF_SWAP:
24615 case V2DF_FTYPE_V2DF_V2DF_SWAP:
24616 gcc_assert (comparison != UNKNOWN);
24620 case V8HI_FTYPE_V8HI_V8HI_COUNT:
24621 case V8HI_FTYPE_V8HI_SI_COUNT:
24622 case V4SI_FTYPE_V4SI_V4SI_COUNT:
24623 case V4SI_FTYPE_V4SI_SI_COUNT:
24624 case V4HI_FTYPE_V4HI_V4HI_COUNT:
24625 case V4HI_FTYPE_V4HI_SI_COUNT:
24626 case V2DI_FTYPE_V2DI_V2DI_COUNT:
24627 case V2DI_FTYPE_V2DI_SI_COUNT:
24628 case V2SI_FTYPE_V2SI_V2SI_COUNT:
24629 case V2SI_FTYPE_V2SI_SI_COUNT:
24630 case V1DI_FTYPE_V1DI_V1DI_COUNT:
24631 case V1DI_FTYPE_V1DI_SI_COUNT:
24633 last_arg_count = true;
24635 case UINT64_FTYPE_UINT64_UINT64:
24636 case UINT_FTYPE_UINT_UINT:
24637 case UINT_FTYPE_UINT_USHORT:
24638 case UINT_FTYPE_UINT_UCHAR:
24641 case V2DI2TI_FTYPE_V2DI_INT:
24644 nargs_constant = 1;
24646 case V8HI_FTYPE_V8HI_INT:
24647 case V8SF_FTYPE_V8SF_INT:
24648 case V4SI_FTYPE_V4SI_INT:
24649 case V4SI_FTYPE_V8SI_INT:
24650 case V4HI_FTYPE_V4HI_INT:
24651 case V4DF_FTYPE_V4DF_INT:
24652 case V4SF_FTYPE_V4SF_INT:
24653 case V4SF_FTYPE_V8SF_INT:
24654 case V2DI_FTYPE_V2DI_INT:
24655 case V2DF_FTYPE_V2DF_INT:
24656 case V2DF_FTYPE_V4DF_INT:
24658 nargs_constant = 1;
24660 case V16QI_FTYPE_V16QI_V16QI_V16QI:
24661 case V8SF_FTYPE_V8SF_V8SF_V8SF:
24662 case V4DF_FTYPE_V4DF_V4DF_V4DF:
24663 case V4SF_FTYPE_V4SF_V4SF_V4SF:
24664 case V2DF_FTYPE_V2DF_V2DF_V2DF:
24667 case V16QI_FTYPE_V16QI_V16QI_INT:
24668 case V8HI_FTYPE_V8HI_V8HI_INT:
24669 case V8SI_FTYPE_V8SI_V8SI_INT:
24670 case V8SI_FTYPE_V8SI_V4SI_INT:
24671 case V8SF_FTYPE_V8SF_V8SF_INT:
24672 case V8SF_FTYPE_V8SF_V4SF_INT:
24673 case V4SI_FTYPE_V4SI_V4SI_INT:
24674 case V4DF_FTYPE_V4DF_V4DF_INT:
24675 case V4DF_FTYPE_V4DF_V2DF_INT:
24676 case V4SF_FTYPE_V4SF_V4SF_INT:
24677 case V2DI_FTYPE_V2DI_V2DI_INT:
24678 case V2DF_FTYPE_V2DF_V2DF_INT:
24680 nargs_constant = 1;
24682 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
24685 nargs_constant = 1;
24687 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
24690 nargs_constant = 1;
24692 case V2DI_FTYPE_V2DI_UINT_UINT:
24694 nargs_constant = 2;
24696 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
24698 nargs_constant = 2;
24701 gcc_unreachable ();
24704 gcc_assert (nargs <= ARRAY_SIZE (args));
24706 if (comparison != UNKNOWN)
24708 gcc_assert (nargs == 2);
24709 return ix86_expand_sse_compare (d, exp, target, swap);
24712 if (rmode == VOIDmode || rmode == tmode)
24716 || GET_MODE (target) != tmode
24717 || ! (*insn_p->operand[0].predicate) (target, tmode))
24718 target = gen_reg_rtx (tmode);
24719 real_target = target;
24723 target = gen_reg_rtx (rmode);
24724 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
24727 for (i = 0; i < nargs; i++)
24729 tree arg = CALL_EXPR_ARG (exp, i);
24730 rtx op = expand_normal (arg);
24731 enum machine_mode mode = insn_p->operand[i + 1].mode;
24732 bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
24734 if (last_arg_count && (i + 1) == nargs)
24736 /* SIMD shift insns take either an 8-bit immediate or
24737 register as count. But builtin functions take int as
24738 count. If count doesn't match, we put it in register. */
24741 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
24742 if (!(*insn_p->operand[i + 1].predicate) (op, mode))
24743 op = copy_to_reg (op);
24746 else if ((nargs - i) <= nargs_constant)
24751 case CODE_FOR_sse4_1_roundpd:
24752 case CODE_FOR_sse4_1_roundps:
24753 case CODE_FOR_sse4_1_roundsd:
24754 case CODE_FOR_sse4_1_roundss:
24755 case CODE_FOR_sse4_1_blendps:
24756 case CODE_FOR_avx_blendpd256:
24757 case CODE_FOR_avx_vpermilv4df:
24758 case CODE_FOR_avx_roundpd256:
24759 case CODE_FOR_avx_roundps256:
24760 error ("the last argument must be a 4-bit immediate");
24763 case CODE_FOR_sse4_1_blendpd:
24764 case CODE_FOR_avx_vpermilv2df:
24765 error ("the last argument must be a 2-bit immediate");
24768 case CODE_FOR_avx_vextractf128v4df:
24769 case CODE_FOR_avx_vextractf128v8sf:
24770 case CODE_FOR_avx_vextractf128v8si:
24771 case CODE_FOR_avx_vinsertf128v4df:
24772 case CODE_FOR_avx_vinsertf128v8sf:
24773 case CODE_FOR_avx_vinsertf128v8si:
24774 error ("the last argument must be a 1-bit immediate");
24777 case CODE_FOR_avx_cmpsdv2df3:
24778 case CODE_FOR_avx_cmpssv4sf3:
24779 case CODE_FOR_avx_cmppdv2df3:
24780 case CODE_FOR_avx_cmppsv4sf3:
24781 case CODE_FOR_avx_cmppdv4df3:
24782 case CODE_FOR_avx_cmppsv8sf3:
24783 error ("the last argument must be a 5-bit immediate");
24787 switch (nargs_constant)
24790 if ((nargs - i) == nargs_constant)
24792 error ("the next to last argument must be an 8-bit immediate");
24796 error ("the last argument must be an 8-bit immediate");
24799 gcc_unreachable ();
24806 if (VECTOR_MODE_P (mode))
24807 op = safe_vector_operand (op, mode);
24809 /* If we aren't optimizing, only allow one memory operand to
24811 if (memory_operand (op, mode))
24814 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
24816 if (optimize || !match || num_memory > 1)
24817 op = copy_to_mode_reg (mode, op);
24821 op = copy_to_reg (op);
24822 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
24827 args[i].mode = mode;
24833 pat = GEN_FCN (icode) (real_target, args[0].op);
24836 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
24839 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24843 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24844 args[2].op, args[3].op);
24847 gcc_unreachable ();
24857 /* Subroutine of ix86_expand_builtin to take care of special insns
24858 with variable number of operands. */
24861 ix86_expand_special_args_builtin (const struct builtin_description *d,
24862 tree exp, rtx target)
24866 unsigned int i, nargs, arg_adjust, memory;
24870 enum machine_mode mode;
24872 enum insn_code icode = d->icode;
24873 bool last_arg_constant = false;
24874 const struct insn_data *insn_p = &insn_data[icode];
24875 enum machine_mode tmode = insn_p->operand[0].mode;
24876 enum { load, store } klass;
24878 switch ((enum ix86_special_builtin_type) d->flag)
24880 case VOID_FTYPE_VOID:
24881 emit_insn (GEN_FCN (icode) (target));
24883 case V2DI_FTYPE_PV2DI:
24884 case V32QI_FTYPE_PCCHAR:
24885 case V16QI_FTYPE_PCCHAR:
24886 case V8SF_FTYPE_PCV4SF:
24887 case V8SF_FTYPE_PCFLOAT:
24888 case V4SF_FTYPE_PCFLOAT:
24889 case V4DF_FTYPE_PCV2DF:
24890 case V4DF_FTYPE_PCDOUBLE:
24891 case V2DF_FTYPE_PCDOUBLE:
24896 case VOID_FTYPE_PV2SF_V4SF:
24897 case VOID_FTYPE_PV4DI_V4DI:
24898 case VOID_FTYPE_PV2DI_V2DI:
24899 case VOID_FTYPE_PCHAR_V32QI:
24900 case VOID_FTYPE_PCHAR_V16QI:
24901 case VOID_FTYPE_PFLOAT_V8SF:
24902 case VOID_FTYPE_PFLOAT_V4SF:
24903 case VOID_FTYPE_PDOUBLE_V4DF:
24904 case VOID_FTYPE_PDOUBLE_V2DF:
24905 case VOID_FTYPE_PDI_DI:
24906 case VOID_FTYPE_PINT_INT:
24909 /* Reserve memory operand for target. */
24910 memory = ARRAY_SIZE (args);
24912 case V4SF_FTYPE_V4SF_PCV2SF:
24913 case V2DF_FTYPE_V2DF_PCDOUBLE:
24918 case V8SF_FTYPE_PCV8SF_V8SF:
24919 case V4DF_FTYPE_PCV4DF_V4DF:
24920 case V4SF_FTYPE_PCV4SF_V4SF:
24921 case V2DF_FTYPE_PCV2DF_V2DF:
24926 case VOID_FTYPE_PV8SF_V8SF_V8SF:
24927 case VOID_FTYPE_PV4DF_V4DF_V4DF:
24928 case VOID_FTYPE_PV4SF_V4SF_V4SF:
24929 case VOID_FTYPE_PV2DF_V2DF_V2DF:
24932 /* Reserve memory operand for target. */
24933 memory = ARRAY_SIZE (args);
24936 gcc_unreachable ();
24939 gcc_assert (nargs <= ARRAY_SIZE (args));
24941 if (klass == store)
24943 arg = CALL_EXPR_ARG (exp, 0);
24944 op = expand_normal (arg);
24945 gcc_assert (target == 0);
24946 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
24954 || GET_MODE (target) != tmode
24955 || ! (*insn_p->operand[0].predicate) (target, tmode))
24956 target = gen_reg_rtx (tmode);
24959 for (i = 0; i < nargs; i++)
24961 enum machine_mode mode = insn_p->operand[i + 1].mode;
24964 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
24965 op = expand_normal (arg);
24966 match = (*insn_p->operand[i + 1].predicate) (op, mode);
24968 if (last_arg_constant && (i + 1) == nargs)
24974 error ("the last argument must be an 8-bit immediate");
24982 /* This must be the memory operand. */
24983 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
24984 gcc_assert (GET_MODE (op) == mode
24985 || GET_MODE (op) == VOIDmode);
24989 /* This must be register. */
24990 if (VECTOR_MODE_P (mode))
24991 op = safe_vector_operand (op, mode);
24993 gcc_assert (GET_MODE (op) == mode
24994 || GET_MODE (op) == VOIDmode);
24995 op = copy_to_mode_reg (mode, op);
25000 args[i].mode = mode;
25006 pat = GEN_FCN (icode) (target, args[0].op);
25009 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
25012 gcc_unreachable ();
25018 return klass == store ? 0 : target;
25021 /* Return the integer constant in ARG. Constrain it to be in the range
25022 of the subparts of VEC_TYPE; issue an error if not. */
25025 get_element_number (tree vec_type, tree arg)
25027 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
25029 if (!host_integerp (arg, 1)
25030 || (elt = tree_low_cst (arg, 1), elt > max))
25032 error ("selector must be an integer constant in the range 0..%wi", max);
25039 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25040 ix86_expand_vector_init. We DO have language-level syntax for this, in
25041 the form of (type){ init-list }. Except that since we can't place emms
25042 instructions from inside the compiler, we can't allow the use of MMX
25043 registers unless the user explicitly asks for it. So we do *not* define
25044 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
25045 we have builtins invoked by mmintrin.h that gives us license to emit
25046 these sorts of instructions. */
25049 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
25051 enum machine_mode tmode = TYPE_MODE (type);
25052 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
25053 int i, n_elt = GET_MODE_NUNITS (tmode);
25054 rtvec v = rtvec_alloc (n_elt);
25056 gcc_assert (VECTOR_MODE_P (tmode));
25057 gcc_assert (call_expr_nargs (exp) == n_elt);
25059 for (i = 0; i < n_elt; ++i)
25061 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
25062 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
25065 if (!target || !register_operand (target, tmode))
25066 target = gen_reg_rtx (tmode);
25068 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
25072 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25073 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
25074 had a language-level syntax for referencing vector elements. */
25077 ix86_expand_vec_ext_builtin (tree exp, rtx target)
25079 enum machine_mode tmode, mode0;
25084 arg0 = CALL_EXPR_ARG (exp, 0);
25085 arg1 = CALL_EXPR_ARG (exp, 1);
25087 op0 = expand_normal (arg0);
25088 elt = get_element_number (TREE_TYPE (arg0), arg1);
25090 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25091 mode0 = TYPE_MODE (TREE_TYPE (arg0));
25092 gcc_assert (VECTOR_MODE_P (mode0));
25094 op0 = force_reg (mode0, op0);
25096 if (optimize || !target || !register_operand (target, tmode))
25097 target = gen_reg_rtx (tmode);
25099 ix86_expand_vector_extract (true, target, op0, elt);
25104 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25105 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
25106 a language-level syntax for referencing vector elements. */
25109 ix86_expand_vec_set_builtin (tree exp)
25111 enum machine_mode tmode, mode1;
25112 tree arg0, arg1, arg2;
25114 rtx op0, op1, target;
25116 arg0 = CALL_EXPR_ARG (exp, 0);
25117 arg1 = CALL_EXPR_ARG (exp, 1);
25118 arg2 = CALL_EXPR_ARG (exp, 2);
25120 tmode = TYPE_MODE (TREE_TYPE (arg0));
25121 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25122 gcc_assert (VECTOR_MODE_P (tmode));
25124 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
25125 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
25126 elt = get_element_number (TREE_TYPE (arg0), arg2);
25128 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
25129 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
25131 op0 = force_reg (tmode, op0);
25132 op1 = force_reg (mode1, op1);
25134 /* OP0 is the source of these builtin functions and shouldn't be
25135 modified. Create a copy, use it and return it as target. */
25136 target = gen_reg_rtx (tmode);
25137 emit_move_insn (target, op0);
25138 ix86_expand_vector_set (true, target, op1, elt);
25143 /* Expand an expression EXP that calls a built-in function,
25144 with result going to TARGET if that's convenient
25145 (and in mode MODE if that's convenient).
25146 SUBTARGET may be used as the target for computing one of EXP's operands.
25147 IGNORE is nonzero if the value is to be ignored. */
25150 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
25151 enum machine_mode mode ATTRIBUTE_UNUSED,
25152 int ignore ATTRIBUTE_UNUSED)
25154 const struct builtin_description *d;
25156 enum insn_code icode;
25157 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25158 tree arg0, arg1, arg2;
25159 rtx op0, op1, op2, pat;
25160 enum machine_mode mode0, mode1, mode2;
25161 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25163 /* Determine whether the builtin function is available under the current ISA.
25164 Originally the builtin was not created if it wasn't applicable to the
25165 current ISA based on the command line switches. With function specific
25166 options, we need to check in the context of the function making the call
25167 whether it is supported. */
25168 if (ix86_builtins_isa[fcode].isa
25169 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
25171 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
25172 NULL, NULL, false);
25175 error ("%qE needs unknown isa option", fndecl);
25178 gcc_assert (opts != NULL);
25179 error ("%qE needs isa option %s", fndecl, opts);
25187 case IX86_BUILTIN_MASKMOVQ:
25188 case IX86_BUILTIN_MASKMOVDQU:
25189 icode = (fcode == IX86_BUILTIN_MASKMOVQ
25190 ? CODE_FOR_mmx_maskmovq
25191 : CODE_FOR_sse2_maskmovdqu);
25192 /* Note the arg order is different from the operand order. */
25193 arg1 = CALL_EXPR_ARG (exp, 0);
25194 arg2 = CALL_EXPR_ARG (exp, 1);
25195 arg0 = CALL_EXPR_ARG (exp, 2);
25196 op0 = expand_normal (arg0);
25197 op1 = expand_normal (arg1);
25198 op2 = expand_normal (arg2);
25199 mode0 = insn_data[icode].operand[0].mode;
25200 mode1 = insn_data[icode].operand[1].mode;
25201 mode2 = insn_data[icode].operand[2].mode;
25203 op0 = force_reg (Pmode, op0);
25204 op0 = gen_rtx_MEM (mode1, op0);
25206 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
25207 op0 = copy_to_mode_reg (mode0, op0);
25208 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
25209 op1 = copy_to_mode_reg (mode1, op1);
25210 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
25211 op2 = copy_to_mode_reg (mode2, op2);
25212 pat = GEN_FCN (icode) (op0, op1, op2);
25218 case IX86_BUILTIN_LDMXCSR:
25219 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
25220 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25221 emit_move_insn (target, op0);
25222 emit_insn (gen_sse_ldmxcsr (target));
25225 case IX86_BUILTIN_STMXCSR:
25226 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25227 emit_insn (gen_sse_stmxcsr (target));
25228 return copy_to_mode_reg (SImode, target);
25230 case IX86_BUILTIN_CLFLUSH:
25231 arg0 = CALL_EXPR_ARG (exp, 0);
25232 op0 = expand_normal (arg0);
25233 icode = CODE_FOR_sse2_clflush;
25234 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
25235 op0 = copy_to_mode_reg (Pmode, op0);
25237 emit_insn (gen_sse2_clflush (op0));
25240 case IX86_BUILTIN_MONITOR:
25241 arg0 = CALL_EXPR_ARG (exp, 0);
25242 arg1 = CALL_EXPR_ARG (exp, 1);
25243 arg2 = CALL_EXPR_ARG (exp, 2);
25244 op0 = expand_normal (arg0);
25245 op1 = expand_normal (arg1);
25246 op2 = expand_normal (arg2);
25248 op0 = copy_to_mode_reg (Pmode, op0);
25250 op1 = copy_to_mode_reg (SImode, op1);
25252 op2 = copy_to_mode_reg (SImode, op2);
25253 emit_insn ((*ix86_gen_monitor) (op0, op1, op2));
25256 case IX86_BUILTIN_MWAIT:
25257 arg0 = CALL_EXPR_ARG (exp, 0);
25258 arg1 = CALL_EXPR_ARG (exp, 1);
25259 op0 = expand_normal (arg0);
25260 op1 = expand_normal (arg1);
25262 op0 = copy_to_mode_reg (SImode, op0);
25264 op1 = copy_to_mode_reg (SImode, op1);
25265 emit_insn (gen_sse3_mwait (op0, op1));
25268 case IX86_BUILTIN_VEC_INIT_V2SI:
25269 case IX86_BUILTIN_VEC_INIT_V4HI:
25270 case IX86_BUILTIN_VEC_INIT_V8QI:
25271 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
25273 case IX86_BUILTIN_VEC_EXT_V2DF:
25274 case IX86_BUILTIN_VEC_EXT_V2DI:
25275 case IX86_BUILTIN_VEC_EXT_V4SF:
25276 case IX86_BUILTIN_VEC_EXT_V4SI:
25277 case IX86_BUILTIN_VEC_EXT_V8HI:
25278 case IX86_BUILTIN_VEC_EXT_V2SI:
25279 case IX86_BUILTIN_VEC_EXT_V4HI:
25280 case IX86_BUILTIN_VEC_EXT_V16QI:
25281 return ix86_expand_vec_ext_builtin (exp, target);
25283 case IX86_BUILTIN_VEC_SET_V2DI:
25284 case IX86_BUILTIN_VEC_SET_V4SF:
25285 case IX86_BUILTIN_VEC_SET_V4SI:
25286 case IX86_BUILTIN_VEC_SET_V8HI:
25287 case IX86_BUILTIN_VEC_SET_V4HI:
25288 case IX86_BUILTIN_VEC_SET_V16QI:
25289 return ix86_expand_vec_set_builtin (exp);
25291 case IX86_BUILTIN_INFQ:
25292 case IX86_BUILTIN_HUGE_VALQ:
25294 REAL_VALUE_TYPE inf;
25298 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
25300 tmp = validize_mem (force_const_mem (mode, tmp));
25303 target = gen_reg_rtx (mode);
25305 emit_move_insn (target, tmp);
25313 for (i = 0, d = bdesc_special_args;
25314 i < ARRAY_SIZE (bdesc_special_args);
25316 if (d->code == fcode)
25317 return ix86_expand_special_args_builtin (d, exp, target);
25319 for (i = 0, d = bdesc_args;
25320 i < ARRAY_SIZE (bdesc_args);
25322 if (d->code == fcode)
25325 case IX86_BUILTIN_FABSQ:
25326 case IX86_BUILTIN_COPYSIGNQ:
25328 /* Emit a normal call if SSE2 isn't available. */
25329 return expand_call (exp, target, ignore);
25331 return ix86_expand_args_builtin (d, exp, target);
25334 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25335 if (d->code == fcode)
25336 return ix86_expand_sse_comi (d, exp, target);
25338 for (i = 0, d = bdesc_pcmpestr;
25339 i < ARRAY_SIZE (bdesc_pcmpestr);
25341 if (d->code == fcode)
25342 return ix86_expand_sse_pcmpestr (d, exp, target);
25344 for (i = 0, d = bdesc_pcmpistr;
25345 i < ARRAY_SIZE (bdesc_pcmpistr);
25347 if (d->code == fcode)
25348 return ix86_expand_sse_pcmpistr (d, exp, target);
25350 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25351 if (d->code == fcode)
25352 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
25353 (enum multi_arg_type)d->flag,
25356 gcc_unreachable ();
25359 /* Returns a function decl for a vectorized version of the builtin function
25360 with builtin function code FN and the result vector type TYPE, or NULL_TREE
25361 if it is not available. */
25364 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
25367 enum machine_mode in_mode, out_mode;
25370 if (TREE_CODE (type_out) != VECTOR_TYPE
25371 || TREE_CODE (type_in) != VECTOR_TYPE)
25374 out_mode = TYPE_MODE (TREE_TYPE (type_out));
25375 out_n = TYPE_VECTOR_SUBPARTS (type_out);
25376 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25377 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25381 case BUILT_IN_SQRT:
25382 if (out_mode == DFmode && out_n == 2
25383 && in_mode == DFmode && in_n == 2)
25384 return ix86_builtins[IX86_BUILTIN_SQRTPD];
25387 case BUILT_IN_SQRTF:
25388 if (out_mode == SFmode && out_n == 4
25389 && in_mode == SFmode && in_n == 4)
25390 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
25393 case BUILT_IN_LRINT:
25394 if (out_mode == SImode && out_n == 4
25395 && in_mode == DFmode && in_n == 2)
25396 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
25399 case BUILT_IN_LRINTF:
25400 if (out_mode == SImode && out_n == 4
25401 && in_mode == SFmode && in_n == 4)
25402 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
25409 /* Dispatch to a handler for a vectorization library. */
25410 if (ix86_veclib_handler)
25411 return (*ix86_veclib_handler) ((enum built_in_function) fn, type_out,
25417 /* Handler for an SVML-style interface to
25418 a library with vectorized intrinsics. */
25421 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
25424 tree fntype, new_fndecl, args;
25427 enum machine_mode el_mode, in_mode;
25430 /* The SVML is suitable for unsafe math only. */
25431 if (!flag_unsafe_math_optimizations)
25434 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25435 n = TYPE_VECTOR_SUBPARTS (type_out);
25436 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25437 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25438 if (el_mode != in_mode
25446 case BUILT_IN_LOG10:
25448 case BUILT_IN_TANH:
25450 case BUILT_IN_ATAN:
25451 case BUILT_IN_ATAN2:
25452 case BUILT_IN_ATANH:
25453 case BUILT_IN_CBRT:
25454 case BUILT_IN_SINH:
25456 case BUILT_IN_ASINH:
25457 case BUILT_IN_ASIN:
25458 case BUILT_IN_COSH:
25460 case BUILT_IN_ACOSH:
25461 case BUILT_IN_ACOS:
25462 if (el_mode != DFmode || n != 2)
25466 case BUILT_IN_EXPF:
25467 case BUILT_IN_LOGF:
25468 case BUILT_IN_LOG10F:
25469 case BUILT_IN_POWF:
25470 case BUILT_IN_TANHF:
25471 case BUILT_IN_TANF:
25472 case BUILT_IN_ATANF:
25473 case BUILT_IN_ATAN2F:
25474 case BUILT_IN_ATANHF:
25475 case BUILT_IN_CBRTF:
25476 case BUILT_IN_SINHF:
25477 case BUILT_IN_SINF:
25478 case BUILT_IN_ASINHF:
25479 case BUILT_IN_ASINF:
25480 case BUILT_IN_COSHF:
25481 case BUILT_IN_COSF:
25482 case BUILT_IN_ACOSHF:
25483 case BUILT_IN_ACOSF:
25484 if (el_mode != SFmode || n != 4)
25492 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25494 if (fn == BUILT_IN_LOGF)
25495 strcpy (name, "vmlsLn4");
25496 else if (fn == BUILT_IN_LOG)
25497 strcpy (name, "vmldLn2");
25500 sprintf (name, "vmls%s", bname+10);
25501 name[strlen (name)-1] = '4';
25504 sprintf (name, "vmld%s2", bname+10);
25506 /* Convert to uppercase. */
25510 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25511 args = TREE_CHAIN (args))
25515 fntype = build_function_type_list (type_out, type_in, NULL);
25517 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25519 /* Build a function declaration for the vectorized function. */
25520 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
25521 TREE_PUBLIC (new_fndecl) = 1;
25522 DECL_EXTERNAL (new_fndecl) = 1;
25523 DECL_IS_NOVOPS (new_fndecl) = 1;
25524 TREE_READONLY (new_fndecl) = 1;
25529 /* Handler for an ACML-style interface to
25530 a library with vectorized intrinsics. */
25533 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
25535 char name[20] = "__vr.._";
25536 tree fntype, new_fndecl, args;
25539 enum machine_mode el_mode, in_mode;
25542 /* The ACML is 64bits only and suitable for unsafe math only as
25543 it does not correctly support parts of IEEE with the required
25544 precision such as denormals. */
25546 || !flag_unsafe_math_optimizations)
25549 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25550 n = TYPE_VECTOR_SUBPARTS (type_out);
25551 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25552 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25553 if (el_mode != in_mode
25563 case BUILT_IN_LOG2:
25564 case BUILT_IN_LOG10:
25567 if (el_mode != DFmode
25572 case BUILT_IN_SINF:
25573 case BUILT_IN_COSF:
25574 case BUILT_IN_EXPF:
25575 case BUILT_IN_POWF:
25576 case BUILT_IN_LOGF:
25577 case BUILT_IN_LOG2F:
25578 case BUILT_IN_LOG10F:
25581 if (el_mode != SFmode
25590 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25591 sprintf (name + 7, "%s", bname+10);
25594 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25595 args = TREE_CHAIN (args))
25599 fntype = build_function_type_list (type_out, type_in, NULL);
25601 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25603 /* Build a function declaration for the vectorized function. */
25604 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
25605 TREE_PUBLIC (new_fndecl) = 1;
25606 DECL_EXTERNAL (new_fndecl) = 1;
25607 DECL_IS_NOVOPS (new_fndecl) = 1;
25608 TREE_READONLY (new_fndecl) = 1;
25614 /* Returns a decl of a function that implements conversion of an integer vector
25615 into a floating-point vector, or vice-versa. TYPE is the type of the integer
25616 side of the conversion.
25617 Return NULL_TREE if it is not available. */
25620 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
25622 if (TREE_CODE (type) != VECTOR_TYPE)
25628 switch (TYPE_MODE (type))
25631 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
25636 case FIX_TRUNC_EXPR:
25637 switch (TYPE_MODE (type))
25640 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
25650 /* Returns a code for a target-specific builtin that implements
25651 reciprocal of the function, or NULL_TREE if not available. */
25654 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
25655 bool sqrt ATTRIBUTE_UNUSED)
25657 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
25658 && flag_finite_math_only && !flag_trapping_math
25659 && flag_unsafe_math_optimizations))
25663 /* Machine dependent builtins. */
25666 /* Vectorized version of sqrt to rsqrt conversion. */
25667 case IX86_BUILTIN_SQRTPS_NR:
25668 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
25674 /* Normal builtins. */
25677 /* Sqrt to rsqrt conversion. */
25678 case BUILT_IN_SQRTF:
25679 return ix86_builtins[IX86_BUILTIN_RSQRTF];
25686 /* Store OPERAND to the memory after reload is completed. This means
25687 that we can't easily use assign_stack_local. */
25689 ix86_force_to_memory (enum machine_mode mode, rtx operand)
25693 gcc_assert (reload_completed);
25694 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE)
25696 result = gen_rtx_MEM (mode,
25697 gen_rtx_PLUS (Pmode,
25699 GEN_INT (-RED_ZONE_SIZE)));
25700 emit_move_insn (result, operand);
25702 else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT)
25708 operand = gen_lowpart (DImode, operand);
25712 gen_rtx_SET (VOIDmode,
25713 gen_rtx_MEM (DImode,
25714 gen_rtx_PRE_DEC (DImode,
25715 stack_pointer_rtx)),
25719 gcc_unreachable ();
25721 result = gen_rtx_MEM (mode, stack_pointer_rtx);
25730 split_di (&operand, 1, operands, operands + 1);
25732 gen_rtx_SET (VOIDmode,
25733 gen_rtx_MEM (SImode,
25734 gen_rtx_PRE_DEC (Pmode,
25735 stack_pointer_rtx)),
25738 gen_rtx_SET (VOIDmode,
25739 gen_rtx_MEM (SImode,
25740 gen_rtx_PRE_DEC (Pmode,
25741 stack_pointer_rtx)),
25746 /* Store HImodes as SImodes. */
25747 operand = gen_lowpart (SImode, operand);
25751 gen_rtx_SET (VOIDmode,
25752 gen_rtx_MEM (GET_MODE (operand),
25753 gen_rtx_PRE_DEC (SImode,
25754 stack_pointer_rtx)),
25758 gcc_unreachable ();
25760 result = gen_rtx_MEM (mode, stack_pointer_rtx);
25765 /* Free operand from the memory. */
25767 ix86_free_from_memory (enum machine_mode mode)
25769 if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI)
25773 if (mode == DImode || TARGET_64BIT)
25777 /* Use LEA to deallocate stack space. In peephole2 it will be converted
25778 to pop or add instruction if registers are available. */
25779 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
25780 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
25785 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
25786 QImode must go into class Q_REGS.
25787 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
25788 movdf to do mem-to-mem moves through integer regs. */
25790 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
25792 enum machine_mode mode = GET_MODE (x);
25794 /* We're only allowed to return a subclass of CLASS. Many of the
25795 following checks fail for NO_REGS, so eliminate that early. */
25796 if (regclass == NO_REGS)
25799 /* All classes can load zeros. */
25800 if (x == CONST0_RTX (mode))
25803 /* Force constants into memory if we are loading a (nonzero) constant into
25804 an MMX or SSE register. This is because there are no MMX/SSE instructions
25805 to load from a constant. */
25807 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
25810 /* Prefer SSE regs only, if we can use them for math. */
25811 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
25812 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
25814 /* Floating-point constants need more complex checks. */
25815 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
25817 /* General regs can load everything. */
25818 if (reg_class_subset_p (regclass, GENERAL_REGS))
25821 /* Floats can load 0 and 1 plus some others. Note that we eliminated
25822 zero above. We only want to wind up preferring 80387 registers if
25823 we plan on doing computation with them. */
25825 && standard_80387_constant_p (x))
25827 /* Limit class to non-sse. */
25828 if (regclass == FLOAT_SSE_REGS)
25830 if (regclass == FP_TOP_SSE_REGS)
25832 if (regclass == FP_SECOND_SSE_REGS)
25833 return FP_SECOND_REG;
25834 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
25841 /* Generally when we see PLUS here, it's the function invariant
25842 (plus soft-fp const_int). Which can only be computed into general
25844 if (GET_CODE (x) == PLUS)
25845 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
25847 /* QImode constants are easy to load, but non-constant QImode data
25848 must go into Q_REGS. */
25849 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
25851 if (reg_class_subset_p (regclass, Q_REGS))
25853 if (reg_class_subset_p (Q_REGS, regclass))
25861 /* Discourage putting floating-point values in SSE registers unless
25862 SSE math is being used, and likewise for the 387 registers. */
25864 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
25866 enum machine_mode mode = GET_MODE (x);
25868 /* Restrict the output reload class to the register bank that we are doing
25869 math on. If we would like not to return a subset of CLASS, reject this
25870 alternative: if reload cannot do this, it will still use its choice. */
25871 mode = GET_MODE (x);
25872 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
25873 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
25875 if (X87_FLOAT_MODE_P (mode))
25877 if (regclass == FP_TOP_SSE_REGS)
25879 else if (regclass == FP_SECOND_SSE_REGS)
25880 return FP_SECOND_REG;
25882 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
25888 static enum reg_class
25889 ix86_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
25890 enum machine_mode mode,
25891 secondary_reload_info *sri ATTRIBUTE_UNUSED)
25893 /* QImode spills from non-QI registers require
25894 intermediate register on 32bit targets. */
25895 if (!in_p && mode == QImode && !TARGET_64BIT
25896 && (rclass == GENERAL_REGS
25897 || rclass == LEGACY_REGS
25898 || rclass == INDEX_REGS))
25907 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
25908 regno = true_regnum (x);
25910 /* Return Q_REGS if the operand is in memory. */
25918 /* If we are copying between general and FP registers, we need a memory
25919 location. The same is true for SSE and MMX registers.
25921 To optimize register_move_cost performance, allow inline variant.
25923 The macro can't work reliably when one of the CLASSES is class containing
25924 registers from multiple units (SSE, MMX, integer). We avoid this by never
25925 combining those units in single alternative in the machine description.
25926 Ensure that this constraint holds to avoid unexpected surprises.
25928 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
25929 enforce these sanity checks. */
25932 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
25933 enum machine_mode mode, int strict)
25935 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
25936 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
25937 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
25938 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
25939 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
25940 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
25942 gcc_assert (!strict);
25946 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
25949 /* ??? This is a lie. We do have moves between mmx/general, and for
25950 mmx/sse2. But by saying we need secondary memory we discourage the
25951 register allocator from using the mmx registers unless needed. */
25952 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
25955 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
25957 /* SSE1 doesn't have any direct moves from other classes. */
25961 /* If the target says that inter-unit moves are more expensive
25962 than moving through memory, then don't generate them. */
25963 if (!TARGET_INTER_UNIT_MOVES)
25966 /* Between SSE and general, we have moves no larger than word size. */
25967 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
25975 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
25976 enum machine_mode mode, int strict)
25978 return inline_secondary_memory_needed (class1, class2, mode, strict);
25981 /* Return true if the registers in CLASS cannot represent the change from
25982 modes FROM to TO. */
25985 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
25986 enum reg_class regclass)
25991 /* x87 registers can't do subreg at all, as all values are reformatted
25992 to extended precision. */
25993 if (MAYBE_FLOAT_CLASS_P (regclass))
25996 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
25998 /* Vector registers do not support QI or HImode loads. If we don't
25999 disallow a change to these modes, reload will assume it's ok to
26000 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
26001 the vec_dupv4hi pattern. */
26002 if (GET_MODE_SIZE (from) < 4)
26005 /* Vector registers do not support subreg with nonzero offsets, which
26006 are otherwise valid for integer registers. Since we can't see
26007 whether we have a nonzero offset from here, prohibit all
26008 nonparadoxical subregs changing size. */
26009 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
26016 /* Return the cost of moving data of mode M between a
26017 register and memory. A value of 2 is the default; this cost is
26018 relative to those in `REGISTER_MOVE_COST'.
26020 This function is used extensively by register_move_cost that is used to
26021 build tables at startup. Make it inline in this case.
26022 When IN is 2, return maximum of in and out move cost.
26024 If moving between registers and memory is more expensive than
26025 between two registers, you should define this macro to express the
26028 Model also increased moving costs of QImode registers in non
26032 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
26036 if (FLOAT_CLASS_P (regclass))
26054 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
26055 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
26057 if (SSE_CLASS_P (regclass))
26060 switch (GET_MODE_SIZE (mode))
26075 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
26076 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
26078 if (MMX_CLASS_P (regclass))
26081 switch (GET_MODE_SIZE (mode))
26093 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
26094 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
26096 switch (GET_MODE_SIZE (mode))
26099 if (Q_CLASS_P (regclass) || TARGET_64BIT)
26102 return ix86_cost->int_store[0];
26103 if (TARGET_PARTIAL_REG_DEPENDENCY
26104 && optimize_function_for_speed_p (cfun))
26105 cost = ix86_cost->movzbl_load;
26107 cost = ix86_cost->int_load[0];
26109 return MAX (cost, ix86_cost->int_store[0]);
26115 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
26117 return ix86_cost->movzbl_load;
26119 return ix86_cost->int_store[0] + 4;
26124 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
26125 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
26127 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
26128 if (mode == TFmode)
26131 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
26133 cost = ix86_cost->int_load[2];
26135 cost = ix86_cost->int_store[2];
26136 return (cost * (((int) GET_MODE_SIZE (mode)
26137 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
26142 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
26144 return inline_memory_move_cost (mode, regclass, in);
26148 /* Return the cost of moving data from a register in class CLASS1 to
26149 one in class CLASS2.
26151 It is not required that the cost always equal 2 when FROM is the same as TO;
26152 on some machines it is expensive to move between registers if they are not
26153 general registers. */
26156 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
26157 enum reg_class class2)
26159 /* In case we require secondary memory, compute cost of the store followed
26160 by load. In order to avoid bad register allocation choices, we need
26161 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
26163 if (inline_secondary_memory_needed (class1, class2, mode, 0))
26167 cost += inline_memory_move_cost (mode, class1, 2);
26168 cost += inline_memory_move_cost (mode, class2, 2);
26170 /* In case of copying from general_purpose_register we may emit multiple
26171 stores followed by single load causing memory size mismatch stall.
26172 Count this as arbitrarily high cost of 20. */
26173 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
26176 /* In the case of FP/MMX moves, the registers actually overlap, and we
26177 have to switch modes in order to treat them differently. */
26178 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
26179 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
26185 /* Moves between SSE/MMX and integer unit are expensive. */
26186 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
26187 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26189 /* ??? By keeping returned value relatively high, we limit the number
26190 of moves between integer and MMX/SSE registers for all targets.
26191 Additionally, high value prevents problem with x86_modes_tieable_p(),
26192 where integer modes in MMX/SSE registers are not tieable
26193 because of missing QImode and HImode moves to, from or between
26194 MMX/SSE registers. */
26195 return MAX (8, ix86_cost->mmxsse_to_integer);
26197 if (MAYBE_FLOAT_CLASS_P (class1))
26198 return ix86_cost->fp_move;
26199 if (MAYBE_SSE_CLASS_P (class1))
26200 return ix86_cost->sse_move;
26201 if (MAYBE_MMX_CLASS_P (class1))
26202 return ix86_cost->mmx_move;
26206 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
26209 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
26211 /* Flags and only flags can only hold CCmode values. */
26212 if (CC_REGNO_P (regno))
26213 return GET_MODE_CLASS (mode) == MODE_CC;
26214 if (GET_MODE_CLASS (mode) == MODE_CC
26215 || GET_MODE_CLASS (mode) == MODE_RANDOM
26216 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
26218 if (FP_REGNO_P (regno))
26219 return VALID_FP_MODE_P (mode);
26220 if (SSE_REGNO_P (regno))
26222 /* We implement the move patterns for all vector modes into and
26223 out of SSE registers, even when no operation instructions
26224 are available. OImode move is available only when AVX is
26226 return ((TARGET_AVX && mode == OImode)
26227 || VALID_AVX256_REG_MODE (mode)
26228 || VALID_SSE_REG_MODE (mode)
26229 || VALID_SSE2_REG_MODE (mode)
26230 || VALID_MMX_REG_MODE (mode)
26231 || VALID_MMX_REG_MODE_3DNOW (mode));
26233 if (MMX_REGNO_P (regno))
26235 /* We implement the move patterns for 3DNOW modes even in MMX mode,
26236 so if the register is available at all, then we can move data of
26237 the given mode into or out of it. */
26238 return (VALID_MMX_REG_MODE (mode)
26239 || VALID_MMX_REG_MODE_3DNOW (mode));
26242 if (mode == QImode)
26244 /* Take care for QImode values - they can be in non-QI regs,
26245 but then they do cause partial register stalls. */
26246 if (regno <= BX_REG || TARGET_64BIT)
26248 if (!TARGET_PARTIAL_REG_STALL)
26250 return reload_in_progress || reload_completed;
26252 /* We handle both integer and floats in the general purpose registers. */
26253 else if (VALID_INT_MODE_P (mode))
26255 else if (VALID_FP_MODE_P (mode))
26257 else if (VALID_DFP_MODE_P (mode))
26259 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
26260 on to use that value in smaller contexts, this can easily force a
26261 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
26262 supporting DImode, allow it. */
26263 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
26269 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
26270 tieable integer mode. */
26273 ix86_tieable_integer_mode_p (enum machine_mode mode)
26282 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
26285 return TARGET_64BIT;
26292 /* Return true if MODE1 is accessible in a register that can hold MODE2
26293 without copying. That is, all register classes that can hold MODE2
26294 can also hold MODE1. */
26297 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
26299 if (mode1 == mode2)
26302 if (ix86_tieable_integer_mode_p (mode1)
26303 && ix86_tieable_integer_mode_p (mode2))
26306 /* MODE2 being XFmode implies fp stack or general regs, which means we
26307 can tie any smaller floating point modes to it. Note that we do not
26308 tie this with TFmode. */
26309 if (mode2 == XFmode)
26310 return mode1 == SFmode || mode1 == DFmode;
26312 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
26313 that we can tie it with SFmode. */
26314 if (mode2 == DFmode)
26315 return mode1 == SFmode;
26317 /* If MODE2 is only appropriate for an SSE register, then tie with
26318 any other mode acceptable to SSE registers. */
26319 if (GET_MODE_SIZE (mode2) == 16
26320 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
26321 return (GET_MODE_SIZE (mode1) == 16
26322 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
26324 /* If MODE2 is appropriate for an MMX register, then tie
26325 with any other mode acceptable to MMX registers. */
26326 if (GET_MODE_SIZE (mode2) == 8
26327 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
26328 return (GET_MODE_SIZE (mode1) == 8
26329 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
26334 /* Compute a (partial) cost for rtx X. Return true if the complete
26335 cost has been computed, and false if subexpressions should be
26336 scanned. In either case, *TOTAL contains the cost result. */
26339 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
26341 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
26342 enum machine_mode mode = GET_MODE (x);
26343 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
26351 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
26353 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
26355 else if (flag_pic && SYMBOLIC_CONST (x)
26357 || (!GET_CODE (x) != LABEL_REF
26358 && (GET_CODE (x) != SYMBOL_REF
26359 || !SYMBOL_REF_LOCAL_P (x)))))
26366 if (mode == VOIDmode)
26369 switch (standard_80387_constant_p (x))
26374 default: /* Other constants */
26379 /* Start with (MEM (SYMBOL_REF)), since that's where
26380 it'll probably end up. Add a penalty for size. */
26381 *total = (COSTS_N_INSNS (1)
26382 + (flag_pic != 0 && !TARGET_64BIT)
26383 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
26389 /* The zero extensions is often completely free on x86_64, so make
26390 it as cheap as possible. */
26391 if (TARGET_64BIT && mode == DImode
26392 && GET_MODE (XEXP (x, 0)) == SImode)
26394 else if (TARGET_ZERO_EXTEND_WITH_AND)
26395 *total = cost->add;
26397 *total = cost->movzx;
26401 *total = cost->movsx;
26405 if (CONST_INT_P (XEXP (x, 1))
26406 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
26408 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26411 *total = cost->add;
26414 if ((value == 2 || value == 3)
26415 && cost->lea <= cost->shift_const)
26417 *total = cost->lea;
26427 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
26429 if (CONST_INT_P (XEXP (x, 1)))
26431 if (INTVAL (XEXP (x, 1)) > 32)
26432 *total = cost->shift_const + COSTS_N_INSNS (2);
26434 *total = cost->shift_const * 2;
26438 if (GET_CODE (XEXP (x, 1)) == AND)
26439 *total = cost->shift_var * 2;
26441 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
26446 if (CONST_INT_P (XEXP (x, 1)))
26447 *total = cost->shift_const;
26449 *total = cost->shift_var;
26454 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26456 /* ??? SSE scalar cost should be used here. */
26457 *total = cost->fmul;
26460 else if (X87_FLOAT_MODE_P (mode))
26462 *total = cost->fmul;
26465 else if (FLOAT_MODE_P (mode))
26467 /* ??? SSE vector cost should be used here. */
26468 *total = cost->fmul;
26473 rtx op0 = XEXP (x, 0);
26474 rtx op1 = XEXP (x, 1);
26476 if (CONST_INT_P (XEXP (x, 1)))
26478 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26479 for (nbits = 0; value != 0; value &= value - 1)
26483 /* This is arbitrary. */
26486 /* Compute costs correctly for widening multiplication. */
26487 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
26488 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
26489 == GET_MODE_SIZE (mode))
26491 int is_mulwiden = 0;
26492 enum machine_mode inner_mode = GET_MODE (op0);
26494 if (GET_CODE (op0) == GET_CODE (op1))
26495 is_mulwiden = 1, op1 = XEXP (op1, 0);
26496 else if (CONST_INT_P (op1))
26498 if (GET_CODE (op0) == SIGN_EXTEND)
26499 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
26502 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
26506 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
26509 *total = (cost->mult_init[MODE_INDEX (mode)]
26510 + nbits * cost->mult_bit
26511 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
26520 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26521 /* ??? SSE cost should be used here. */
26522 *total = cost->fdiv;
26523 else if (X87_FLOAT_MODE_P (mode))
26524 *total = cost->fdiv;
26525 else if (FLOAT_MODE_P (mode))
26526 /* ??? SSE vector cost should be used here. */
26527 *total = cost->fdiv;
26529 *total = cost->divide[MODE_INDEX (mode)];
26533 if (GET_MODE_CLASS (mode) == MODE_INT
26534 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
26536 if (GET_CODE (XEXP (x, 0)) == PLUS
26537 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
26538 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
26539 && CONSTANT_P (XEXP (x, 1)))
26541 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
26542 if (val == 2 || val == 4 || val == 8)
26544 *total = cost->lea;
26545 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26546 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
26547 outer_code, speed);
26548 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26552 else if (GET_CODE (XEXP (x, 0)) == MULT
26553 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
26555 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
26556 if (val == 2 || val == 4 || val == 8)
26558 *total = cost->lea;
26559 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26560 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26564 else if (GET_CODE (XEXP (x, 0)) == PLUS)
26566 *total = cost->lea;
26567 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26568 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26569 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26576 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26578 /* ??? SSE cost should be used here. */
26579 *total = cost->fadd;
26582 else if (X87_FLOAT_MODE_P (mode))
26584 *total = cost->fadd;
26587 else if (FLOAT_MODE_P (mode))
26589 /* ??? SSE vector cost should be used here. */
26590 *total = cost->fadd;
26598 if (!TARGET_64BIT && mode == DImode)
26600 *total = (cost->add * 2
26601 + (rtx_cost (XEXP (x, 0), outer_code, speed)
26602 << (GET_MODE (XEXP (x, 0)) != DImode))
26603 + (rtx_cost (XEXP (x, 1), outer_code, speed)
26604 << (GET_MODE (XEXP (x, 1)) != DImode)));
26610 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26612 /* ??? SSE cost should be used here. */
26613 *total = cost->fchs;
26616 else if (X87_FLOAT_MODE_P (mode))
26618 *total = cost->fchs;
26621 else if (FLOAT_MODE_P (mode))
26623 /* ??? SSE vector cost should be used here. */
26624 *total = cost->fchs;
26630 if (!TARGET_64BIT && mode == DImode)
26631 *total = cost->add * 2;
26633 *total = cost->add;
26637 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
26638 && XEXP (XEXP (x, 0), 1) == const1_rtx
26639 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
26640 && XEXP (x, 1) == const0_rtx)
26642 /* This kind of construct is implemented using test[bwl].
26643 Treat it as if we had an AND. */
26644 *total = (cost->add
26645 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
26646 + rtx_cost (const1_rtx, outer_code, speed));
26652 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
26657 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26658 /* ??? SSE cost should be used here. */
26659 *total = cost->fabs;
26660 else if (X87_FLOAT_MODE_P (mode))
26661 *total = cost->fabs;
26662 else if (FLOAT_MODE_P (mode))
26663 /* ??? SSE vector cost should be used here. */
26664 *total = cost->fabs;
26668 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26669 /* ??? SSE cost should be used here. */
26670 *total = cost->fsqrt;
26671 else if (X87_FLOAT_MODE_P (mode))
26672 *total = cost->fsqrt;
26673 else if (FLOAT_MODE_P (mode))
26674 /* ??? SSE vector cost should be used here. */
26675 *total = cost->fsqrt;
26679 if (XINT (x, 1) == UNSPEC_TP)
26690 static int current_machopic_label_num;
26692 /* Given a symbol name and its associated stub, write out the
26693 definition of the stub. */
26696 machopic_output_stub (FILE *file, const char *symb, const char *stub)
26698 unsigned int length;
26699 char *binder_name, *symbol_name, lazy_ptr_name[32];
26700 int label = ++current_machopic_label_num;
26702 /* For 64-bit we shouldn't get here. */
26703 gcc_assert (!TARGET_64BIT);
26705 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
26706 symb = (*targetm.strip_name_encoding) (symb);
26708 length = strlen (stub);
26709 binder_name = XALLOCAVEC (char, length + 32);
26710 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
26712 length = strlen (symb);
26713 symbol_name = XALLOCAVEC (char, length + 32);
26714 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
26716 sprintf (lazy_ptr_name, "L%d$lz", label);
26719 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
26721 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
26723 fprintf (file, "%s:\n", stub);
26724 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26728 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
26729 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
26730 fprintf (file, "\tjmp\t*%%edx\n");
26733 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
26735 fprintf (file, "%s:\n", binder_name);
26739 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
26740 fprintf (file, "\tpushl\t%%eax\n");
26743 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
26745 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
26747 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
26748 fprintf (file, "%s:\n", lazy_ptr_name);
26749 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26750 fprintf (file, "\t.long %s\n", binder_name);
26754 darwin_x86_file_end (void)
26756 darwin_file_end ();
26759 #endif /* TARGET_MACHO */
26761 /* Order the registers for register allocator. */
26764 x86_order_regs_for_local_alloc (void)
26769 /* First allocate the local general purpose registers. */
26770 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26771 if (GENERAL_REGNO_P (i) && call_used_regs[i])
26772 reg_alloc_order [pos++] = i;
26774 /* Global general purpose registers. */
26775 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26776 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
26777 reg_alloc_order [pos++] = i;
26779 /* x87 registers come first in case we are doing FP math
26781 if (!TARGET_SSE_MATH)
26782 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
26783 reg_alloc_order [pos++] = i;
26785 /* SSE registers. */
26786 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
26787 reg_alloc_order [pos++] = i;
26788 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
26789 reg_alloc_order [pos++] = i;
26791 /* x87 registers. */
26792 if (TARGET_SSE_MATH)
26793 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
26794 reg_alloc_order [pos++] = i;
26796 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
26797 reg_alloc_order [pos++] = i;
26799 /* Initialize the rest of array as we do not allocate some registers
26801 while (pos < FIRST_PSEUDO_REGISTER)
26802 reg_alloc_order [pos++] = 0;
26805 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
26806 struct attribute_spec.handler. */
26808 ix86_handle_abi_attribute (tree *node, tree name,
26809 tree args ATTRIBUTE_UNUSED,
26810 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26812 if (TREE_CODE (*node) != FUNCTION_TYPE
26813 && TREE_CODE (*node) != METHOD_TYPE
26814 && TREE_CODE (*node) != FIELD_DECL
26815 && TREE_CODE (*node) != TYPE_DECL)
26817 warning (OPT_Wattributes, "%qs attribute only applies to functions",
26818 IDENTIFIER_POINTER (name));
26819 *no_add_attrs = true;
26824 warning (OPT_Wattributes, "%qs attribute only available for 64-bit",
26825 IDENTIFIER_POINTER (name));
26826 *no_add_attrs = true;
26830 /* Can combine regparm with all attributes but fastcall. */
26831 if (is_attribute_p ("ms_abi", name))
26833 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
26835 error ("ms_abi and sysv_abi attributes are not compatible");
26840 else if (is_attribute_p ("sysv_abi", name))
26842 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
26844 error ("ms_abi and sysv_abi attributes are not compatible");
26853 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
26854 struct attribute_spec.handler. */
26856 ix86_handle_struct_attribute (tree *node, tree name,
26857 tree args ATTRIBUTE_UNUSED,
26858 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26861 if (DECL_P (*node))
26863 if (TREE_CODE (*node) == TYPE_DECL)
26864 type = &TREE_TYPE (*node);
26869 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
26870 || TREE_CODE (*type) == UNION_TYPE)))
26872 warning (OPT_Wattributes, "%qs attribute ignored",
26873 IDENTIFIER_POINTER (name));
26874 *no_add_attrs = true;
26877 else if ((is_attribute_p ("ms_struct", name)
26878 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
26879 || ((is_attribute_p ("gcc_struct", name)
26880 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
26882 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
26883 IDENTIFIER_POINTER (name));
26884 *no_add_attrs = true;
26891 ix86_ms_bitfield_layout_p (const_tree record_type)
26893 return (TARGET_MS_BITFIELD_LAYOUT &&
26894 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
26895 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
26898 /* Returns an expression indicating where the this parameter is
26899 located on entry to the FUNCTION. */
26902 x86_this_parameter (tree function)
26904 tree type = TREE_TYPE (function);
26905 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
26910 const int *parm_regs;
26912 if (ix86_function_type_abi (type) == MS_ABI)
26913 parm_regs = x86_64_ms_abi_int_parameter_registers;
26915 parm_regs = x86_64_int_parameter_registers;
26916 return gen_rtx_REG (DImode, parm_regs[aggr]);
26919 nregs = ix86_function_regparm (type, function);
26921 if (nregs > 0 && !stdarg_p (type))
26925 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
26926 regno = aggr ? DX_REG : CX_REG;
26934 return gen_rtx_MEM (SImode,
26935 plus_constant (stack_pointer_rtx, 4));
26938 return gen_rtx_REG (SImode, regno);
26941 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
26944 /* Determine whether x86_output_mi_thunk can succeed. */
26947 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
26948 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
26949 HOST_WIDE_INT vcall_offset, const_tree function)
26951 /* 64-bit can handle anything. */
26955 /* For 32-bit, everything's fine if we have one free register. */
26956 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
26959 /* Need a free register for vcall_offset. */
26963 /* Need a free register for GOT references. */
26964 if (flag_pic && !(*targetm.binds_local_p) (function))
26967 /* Otherwise ok. */
26971 /* Output the assembler code for a thunk function. THUNK_DECL is the
26972 declaration for the thunk function itself, FUNCTION is the decl for
26973 the target function. DELTA is an immediate constant offset to be
26974 added to THIS. If VCALL_OFFSET is nonzero, the word at
26975 *(*this + vcall_offset) should be added to THIS. */
26978 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
26979 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
26980 HOST_WIDE_INT vcall_offset, tree function)
26983 rtx this_param = x86_this_parameter (function);
26986 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
26987 pull it in now and let DELTA benefit. */
26988 if (REG_P (this_param))
26989 this_reg = this_param;
26990 else if (vcall_offset)
26992 /* Put the this parameter into %eax. */
26993 xops[0] = this_param;
26994 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
26995 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
26998 this_reg = NULL_RTX;
27000 /* Adjust the this parameter by a fixed constant. */
27003 xops[0] = GEN_INT (delta);
27004 xops[1] = this_reg ? this_reg : this_param;
27007 if (!x86_64_general_operand (xops[0], DImode))
27009 tmp = gen_rtx_REG (DImode, R10_REG);
27011 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
27013 xops[1] = this_param;
27015 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
27018 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
27021 /* Adjust the this parameter by a value stored in the vtable. */
27025 tmp = gen_rtx_REG (DImode, R10_REG);
27028 int tmp_regno = CX_REG;
27029 if (lookup_attribute ("fastcall",
27030 TYPE_ATTRIBUTES (TREE_TYPE (function))))
27031 tmp_regno = AX_REG;
27032 tmp = gen_rtx_REG (SImode, tmp_regno);
27035 xops[0] = gen_rtx_MEM (Pmode, this_reg);
27037 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27039 /* Adjust the this parameter. */
27040 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
27041 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
27043 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
27044 xops[0] = GEN_INT (vcall_offset);
27046 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
27047 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
27049 xops[1] = this_reg;
27050 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
27053 /* If necessary, drop THIS back to its stack slot. */
27054 if (this_reg && this_reg != this_param)
27056 xops[0] = this_reg;
27057 xops[1] = this_param;
27058 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27061 xops[0] = XEXP (DECL_RTL (function), 0);
27064 if (!flag_pic || (*targetm.binds_local_p) (function))
27065 output_asm_insn ("jmp\t%P0", xops);
27066 /* All thunks should be in the same object as their target,
27067 and thus binds_local_p should be true. */
27068 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
27069 gcc_unreachable ();
27072 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
27073 tmp = gen_rtx_CONST (Pmode, tmp);
27074 tmp = gen_rtx_MEM (QImode, tmp);
27076 output_asm_insn ("jmp\t%A0", xops);
27081 if (!flag_pic || (*targetm.binds_local_p) (function))
27082 output_asm_insn ("jmp\t%P0", xops);
27087 rtx sym_ref = XEXP (DECL_RTL (function), 0);
27088 tmp = (gen_rtx_SYMBOL_REF
27090 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
27091 tmp = gen_rtx_MEM (QImode, tmp);
27093 output_asm_insn ("jmp\t%0", xops);
27096 #endif /* TARGET_MACHO */
27098 tmp = gen_rtx_REG (SImode, CX_REG);
27099 output_set_got (tmp, NULL_RTX);
27102 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
27103 output_asm_insn ("jmp\t{*}%1", xops);
27109 x86_file_start (void)
27111 default_file_start ();
27113 darwin_file_start ();
27115 if (X86_FILE_START_VERSION_DIRECTIVE)
27116 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
27117 if (X86_FILE_START_FLTUSED)
27118 fputs ("\t.global\t__fltused\n", asm_out_file);
27119 if (ix86_asm_dialect == ASM_INTEL)
27120 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
27124 x86_field_alignment (tree field, int computed)
27126 enum machine_mode mode;
27127 tree type = TREE_TYPE (field);
27129 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
27131 mode = TYPE_MODE (strip_array_types (type));
27132 if (mode == DFmode || mode == DCmode
27133 || GET_MODE_CLASS (mode) == MODE_INT
27134 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
27135 return MIN (32, computed);
27139 /* Output assembler code to FILE to increment profiler label # LABELNO
27140 for profiling a function entry. */
27142 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
27146 #ifndef NO_PROFILE_COUNTERS
27147 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
27150 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
27151 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
27153 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
27157 #ifndef NO_PROFILE_COUNTERS
27158 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
27159 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
27161 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
27165 #ifndef NO_PROFILE_COUNTERS
27166 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
27167 PROFILE_COUNT_REGISTER);
27169 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
27173 /* We don't have exact information about the insn sizes, but we may assume
27174 quite safely that we are informed about all 1 byte insns and memory
27175 address sizes. This is enough to eliminate unnecessary padding in
27179 min_insn_size (rtx insn)
27183 if (!INSN_P (insn) || !active_insn_p (insn))
27186 /* Discard alignments we've emit and jump instructions. */
27187 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
27188 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
27191 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
27192 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
27195 /* Important case - calls are always 5 bytes.
27196 It is common to have many calls in the row. */
27198 && symbolic_reference_mentioned_p (PATTERN (insn))
27199 && !SIBLING_CALL_P (insn))
27201 if (get_attr_length (insn) <= 1)
27204 /* For normal instructions we may rely on the sizes of addresses
27205 and the presence of symbol to require 4 bytes of encoding.
27206 This is not the case for jumps where references are PC relative. */
27207 if (!JUMP_P (insn))
27209 l = get_attr_length_address (insn);
27210 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
27219 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
27223 ix86_avoid_jump_misspredicts (void)
27225 rtx insn, start = get_insns ();
27226 int nbytes = 0, njumps = 0;
27229 /* Look for all minimal intervals of instructions containing 4 jumps.
27230 The intervals are bounded by START and INSN. NBYTES is the total
27231 size of instructions in the interval including INSN and not including
27232 START. When the NBYTES is smaller than 16 bytes, it is possible
27233 that the end of START and INSN ends up in the same 16byte page.
27235 The smallest offset in the page INSN can start is the case where START
27236 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
27237 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
27239 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
27242 nbytes += min_insn_size (insn);
27244 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
27245 INSN_UID (insn), min_insn_size (insn));
27247 && GET_CODE (PATTERN (insn)) != ADDR_VEC
27248 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
27256 start = NEXT_INSN (start);
27257 if ((JUMP_P (start)
27258 && GET_CODE (PATTERN (start)) != ADDR_VEC
27259 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27261 njumps--, isjump = 1;
27264 nbytes -= min_insn_size (start);
27266 gcc_assert (njumps >= 0);
27268 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
27269 INSN_UID (start), INSN_UID (insn), nbytes);
27271 if (njumps == 3 && isjump && nbytes < 16)
27273 int padsize = 15 - nbytes + min_insn_size (insn);
27276 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
27277 INSN_UID (insn), padsize);
27278 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
27283 /* AMD Athlon works faster
27284 when RET is not destination of conditional jump or directly preceded
27285 by other jump instruction. We avoid the penalty by inserting NOP just
27286 before the RET instructions in such cases. */
27288 ix86_pad_returns (void)
27293 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
27295 basic_block bb = e->src;
27296 rtx ret = BB_END (bb);
27298 bool replace = false;
27300 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
27301 || optimize_bb_for_size_p (bb))
27303 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
27304 if (active_insn_p (prev) || LABEL_P (prev))
27306 if (prev && LABEL_P (prev))
27311 FOR_EACH_EDGE (e, ei, bb->preds)
27312 if (EDGE_FREQUENCY (e) && e->src->index >= 0
27313 && !(e->flags & EDGE_FALLTHRU))
27318 prev = prev_active_insn (ret);
27320 && ((JUMP_P (prev) && any_condjump_p (prev))
27323 /* Empty functions get branch mispredict even when the jump destination
27324 is not visible to us. */
27325 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
27330 emit_insn_before (gen_return_internal_long (), ret);
27336 /* Implement machine specific optimizations. We implement padding of returns
27337 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
27341 if (TARGET_PAD_RETURNS && optimize
27342 && optimize_function_for_speed_p (cfun))
27343 ix86_pad_returns ();
27344 if (TARGET_FOUR_JUMP_LIMIT && optimize
27345 && optimize_function_for_speed_p (cfun))
27346 ix86_avoid_jump_misspredicts ();
27349 /* Return nonzero when QImode register that must be represented via REX prefix
27352 x86_extended_QIreg_mentioned_p (rtx insn)
27355 extract_insn_cached (insn);
27356 for (i = 0; i < recog_data.n_operands; i++)
27357 if (REG_P (recog_data.operand[i])
27358 && REGNO (recog_data.operand[i]) > BX_REG)
27363 /* Return nonzero when P points to register encoded via REX prefix.
27364 Called via for_each_rtx. */
27366 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
27368 unsigned int regno;
27371 regno = REGNO (*p);
27372 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
27375 /* Return true when INSN mentions register that must be encoded using REX
27378 x86_extended_reg_mentioned_p (rtx insn)
27380 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
27381 extended_reg_mentioned_1, NULL);
27384 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
27385 optabs would emit if we didn't have TFmode patterns. */
27388 x86_emit_floatuns (rtx operands[2])
27390 rtx neglab, donelab, i0, i1, f0, in, out;
27391 enum machine_mode mode, inmode;
27393 inmode = GET_MODE (operands[1]);
27394 gcc_assert (inmode == SImode || inmode == DImode);
27397 in = force_reg (inmode, operands[1]);
27398 mode = GET_MODE (out);
27399 neglab = gen_label_rtx ();
27400 donelab = gen_label_rtx ();
27401 f0 = gen_reg_rtx (mode);
27403 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
27405 expand_float (out, in, 0);
27407 emit_jump_insn (gen_jump (donelab));
27410 emit_label (neglab);
27412 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
27414 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
27416 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
27418 expand_float (f0, i0, 0);
27420 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
27422 emit_label (donelab);
27425 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27426 with all elements equal to VAR. Return true if successful. */
27429 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
27430 rtx target, rtx val)
27432 enum machine_mode hmode, smode, wsmode, wvmode;
27447 val = force_reg (GET_MODE_INNER (mode), val);
27448 x = gen_rtx_VEC_DUPLICATE (mode, val);
27449 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27455 if (TARGET_SSE || TARGET_3DNOW_A)
27457 val = gen_lowpart (SImode, val);
27458 x = gen_rtx_TRUNCATE (HImode, val);
27459 x = gen_rtx_VEC_DUPLICATE (mode, x);
27460 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27482 /* Extend HImode to SImode using a paradoxical SUBREG. */
27483 tmp1 = gen_reg_rtx (SImode);
27484 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27485 /* Insert the SImode value as low element of V4SImode vector. */
27486 tmp2 = gen_reg_rtx (V4SImode);
27487 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
27488 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
27489 CONST0_RTX (V4SImode),
27491 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
27492 /* Cast the V4SImode vector back to a V8HImode vector. */
27493 tmp1 = gen_reg_rtx (V8HImode);
27494 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
27495 /* Duplicate the low short through the whole low SImode word. */
27496 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
27497 /* Cast the V8HImode vector back to a V4SImode vector. */
27498 tmp2 = gen_reg_rtx (V4SImode);
27499 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
27500 /* Replicate the low element of the V4SImode vector. */
27501 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
27502 /* Cast the V2SImode back to V8HImode, and store in target. */
27503 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
27514 /* Extend QImode to SImode using a paradoxical SUBREG. */
27515 tmp1 = gen_reg_rtx (SImode);
27516 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27517 /* Insert the SImode value as low element of V4SImode vector. */
27518 tmp2 = gen_reg_rtx (V4SImode);
27519 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
27520 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
27521 CONST0_RTX (V4SImode),
27523 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
27524 /* Cast the V4SImode vector back to a V16QImode vector. */
27525 tmp1 = gen_reg_rtx (V16QImode);
27526 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
27527 /* Duplicate the low byte through the whole low SImode word. */
27528 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
27529 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
27530 /* Cast the V16QImode vector back to a V4SImode vector. */
27531 tmp2 = gen_reg_rtx (V4SImode);
27532 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
27533 /* Replicate the low element of the V4SImode vector. */
27534 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
27535 /* Cast the V2SImode back to V16QImode, and store in target. */
27536 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
27544 /* Replicate the value once into the next wider mode and recurse. */
27545 val = convert_modes (wsmode, smode, val, true);
27546 x = expand_simple_binop (wsmode, ASHIFT, val,
27547 GEN_INT (GET_MODE_BITSIZE (smode)),
27548 NULL_RTX, 1, OPTAB_LIB_WIDEN);
27549 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
27551 x = gen_reg_rtx (wvmode);
27552 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
27553 gcc_unreachable ();
27554 emit_move_insn (target, gen_lowpart (mode, x));
27577 rtx tmp = gen_reg_rtx (hmode);
27578 ix86_expand_vector_init_duplicate (mmx_ok, hmode, tmp, val);
27579 emit_insn (gen_rtx_SET (VOIDmode, target,
27580 gen_rtx_VEC_CONCAT (mode, tmp, tmp)));
27589 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27590 whose ONE_VAR element is VAR, and other elements are zero. Return true
27594 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
27595 rtx target, rtx var, int one_var)
27597 enum machine_mode vsimode;
27600 bool use_vector_set = false;
27605 /* For SSE4.1, we normally use vector set. But if the second
27606 element is zero and inter-unit moves are OK, we use movq
27608 use_vector_set = (TARGET_64BIT
27610 && !(TARGET_INTER_UNIT_MOVES
27616 use_vector_set = TARGET_SSE4_1;
27619 use_vector_set = TARGET_SSE2;
27622 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
27629 use_vector_set = TARGET_AVX;
27632 /* Use ix86_expand_vector_set in 64bit mode only. */
27633 use_vector_set = TARGET_AVX && TARGET_64BIT;
27639 if (use_vector_set)
27641 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
27642 var = force_reg (GET_MODE_INNER (mode), var);
27643 ix86_expand_vector_set (mmx_ok, target, var, one_var);
27659 var = force_reg (GET_MODE_INNER (mode), var);
27660 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
27661 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27666 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
27667 new_target = gen_reg_rtx (mode);
27669 new_target = target;
27670 var = force_reg (GET_MODE_INNER (mode), var);
27671 x = gen_rtx_VEC_DUPLICATE (mode, var);
27672 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
27673 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
27676 /* We need to shuffle the value to the correct position, so
27677 create a new pseudo to store the intermediate result. */
27679 /* With SSE2, we can use the integer shuffle insns. */
27680 if (mode != V4SFmode && TARGET_SSE2)
27682 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
27684 GEN_INT (one_var == 1 ? 0 : 1),
27685 GEN_INT (one_var == 2 ? 0 : 1),
27686 GEN_INT (one_var == 3 ? 0 : 1)));
27687 if (target != new_target)
27688 emit_move_insn (target, new_target);
27692 /* Otherwise convert the intermediate result to V4SFmode and
27693 use the SSE1 shuffle instructions. */
27694 if (mode != V4SFmode)
27696 tmp = gen_reg_rtx (V4SFmode);
27697 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
27702 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
27704 GEN_INT (one_var == 1 ? 0 : 1),
27705 GEN_INT (one_var == 2 ? 0+4 : 1+4),
27706 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
27708 if (mode != V4SFmode)
27709 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
27710 else if (tmp != target)
27711 emit_move_insn (target, tmp);
27713 else if (target != new_target)
27714 emit_move_insn (target, new_target);
27719 vsimode = V4SImode;
27725 vsimode = V2SImode;
27731 /* Zero extend the variable element to SImode and recurse. */
27732 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
27734 x = gen_reg_rtx (vsimode);
27735 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
27737 gcc_unreachable ();
27739 emit_move_insn (target, gen_lowpart (mode, x));
27747 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27748 consisting of the values in VALS. It is known that all elements
27749 except ONE_VAR are constants. Return true if successful. */
27752 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
27753 rtx target, rtx vals, int one_var)
27755 rtx var = XVECEXP (vals, 0, one_var);
27756 enum machine_mode wmode;
27759 const_vec = copy_rtx (vals);
27760 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
27761 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
27769 /* For the two element vectors, it's just as easy to use
27770 the general case. */
27774 /* Use ix86_expand_vector_set in 64bit mode only. */
27797 /* There's no way to set one QImode entry easily. Combine
27798 the variable value with its adjacent constant value, and
27799 promote to an HImode set. */
27800 x = XVECEXP (vals, 0, one_var ^ 1);
27803 var = convert_modes (HImode, QImode, var, true);
27804 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
27805 NULL_RTX, 1, OPTAB_LIB_WIDEN);
27806 x = GEN_INT (INTVAL (x) & 0xff);
27810 var = convert_modes (HImode, QImode, var, true);
27811 x = gen_int_mode (INTVAL (x) << 8, HImode);
27813 if (x != const0_rtx)
27814 var = expand_simple_binop (HImode, IOR, var, x, var,
27815 1, OPTAB_LIB_WIDEN);
27817 x = gen_reg_rtx (wmode);
27818 emit_move_insn (x, gen_lowpart (wmode, const_vec));
27819 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
27821 emit_move_insn (target, gen_lowpart (mode, x));
27828 emit_move_insn (target, const_vec);
27829 ix86_expand_vector_set (mmx_ok, target, var, one_var);
27833 /* A subroutine of ix86_expand_vector_init_general. Use vector
27834 concatenate to handle the most general case: all values variable,
27835 and none identical. */
27838 ix86_expand_vector_init_concat (enum machine_mode mode,
27839 rtx target, rtx *ops, int n)
27841 enum machine_mode cmode, hmode = VOIDmode;
27842 rtx first[8], second[4];
27882 gcc_unreachable ();
27885 if (!register_operand (ops[1], cmode))
27886 ops[1] = force_reg (cmode, ops[1]);
27887 if (!register_operand (ops[0], cmode))
27888 ops[0] = force_reg (cmode, ops[0]);
27889 emit_insn (gen_rtx_SET (VOIDmode, target,
27890 gen_rtx_VEC_CONCAT (mode, ops[0],
27910 gcc_unreachable ();
27926 gcc_unreachable ();
27931 /* FIXME: We process inputs backward to help RA. PR 36222. */
27934 for (; i > 0; i -= 2, j--)
27936 first[j] = gen_reg_rtx (cmode);
27937 v = gen_rtvec (2, ops[i - 1], ops[i]);
27938 ix86_expand_vector_init (false, first[j],
27939 gen_rtx_PARALLEL (cmode, v));
27945 gcc_assert (hmode != VOIDmode);
27946 for (i = j = 0; i < n; i += 2, j++)
27948 second[j] = gen_reg_rtx (hmode);
27949 ix86_expand_vector_init_concat (hmode, second [j],
27953 ix86_expand_vector_init_concat (mode, target, second, n);
27956 ix86_expand_vector_init_concat (mode, target, first, n);
27960 gcc_unreachable ();
27964 /* A subroutine of ix86_expand_vector_init_general. Use vector
27965 interleave to handle the most general case: all values variable,
27966 and none identical. */
27969 ix86_expand_vector_init_interleave (enum machine_mode mode,
27970 rtx target, rtx *ops, int n)
27972 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
27975 rtx (*gen_load_even) (rtx, rtx, rtx);
27976 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
27977 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
27982 gen_load_even = gen_vec_setv8hi;
27983 gen_interleave_first_low = gen_vec_interleave_lowv4si;
27984 gen_interleave_second_low = gen_vec_interleave_lowv2di;
27985 inner_mode = HImode;
27986 first_imode = V4SImode;
27987 second_imode = V2DImode;
27988 third_imode = VOIDmode;
27991 gen_load_even = gen_vec_setv16qi;
27992 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
27993 gen_interleave_second_low = gen_vec_interleave_lowv4si;
27994 inner_mode = QImode;
27995 first_imode = V8HImode;
27996 second_imode = V4SImode;
27997 third_imode = V2DImode;
28000 gcc_unreachable ();
28003 for (i = 0; i < n; i++)
28005 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
28006 op0 = gen_reg_rtx (SImode);
28007 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
28009 /* Insert the SImode value as low element of V4SImode vector. */
28010 op1 = gen_reg_rtx (V4SImode);
28011 op0 = gen_rtx_VEC_MERGE (V4SImode,
28012 gen_rtx_VEC_DUPLICATE (V4SImode,
28014 CONST0_RTX (V4SImode),
28016 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
28018 /* Cast the V4SImode vector back to a vector in orignal mode. */
28019 op0 = gen_reg_rtx (mode);
28020 emit_move_insn (op0, gen_lowpart (mode, op1));
28022 /* Load even elements into the second positon. */
28023 emit_insn ((*gen_load_even) (op0,
28024 force_reg (inner_mode,
28028 /* Cast vector to FIRST_IMODE vector. */
28029 ops[i] = gen_reg_rtx (first_imode);
28030 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
28033 /* Interleave low FIRST_IMODE vectors. */
28034 for (i = j = 0; i < n; i += 2, j++)
28036 op0 = gen_reg_rtx (first_imode);
28037 emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
28039 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
28040 ops[j] = gen_reg_rtx (second_imode);
28041 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
28044 /* Interleave low SECOND_IMODE vectors. */
28045 switch (second_imode)
28048 for (i = j = 0; i < n / 2; i += 2, j++)
28050 op0 = gen_reg_rtx (second_imode);
28051 emit_insn ((*gen_interleave_second_low) (op0, ops[i],
28054 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
28056 ops[j] = gen_reg_rtx (third_imode);
28057 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
28059 second_imode = V2DImode;
28060 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28064 op0 = gen_reg_rtx (second_imode);
28065 emit_insn ((*gen_interleave_second_low) (op0, ops[0],
28068 /* Cast the SECOND_IMODE vector back to a vector on original
28070 emit_insn (gen_rtx_SET (VOIDmode, target,
28071 gen_lowpart (mode, op0)));
28075 gcc_unreachable ();
28079 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
28080 all values variable, and none identical. */
28083 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
28084 rtx target, rtx vals)
28086 rtx ops[32], op0, op1;
28087 enum machine_mode half_mode = VOIDmode;
28094 if (!mmx_ok && !TARGET_SSE)
28106 n = GET_MODE_NUNITS (mode);
28107 for (i = 0; i < n; i++)
28108 ops[i] = XVECEXP (vals, 0, i);
28109 ix86_expand_vector_init_concat (mode, target, ops, n);
28113 half_mode = V16QImode;
28117 half_mode = V8HImode;
28121 n = GET_MODE_NUNITS (mode);
28122 for (i = 0; i < n; i++)
28123 ops[i] = XVECEXP (vals, 0, i);
28124 op0 = gen_reg_rtx (half_mode);
28125 op1 = gen_reg_rtx (half_mode);
28126 ix86_expand_vector_init_interleave (half_mode, op0, ops,
28128 ix86_expand_vector_init_interleave (half_mode, op1,
28129 &ops [n >> 1], n >> 2);
28130 emit_insn (gen_rtx_SET (VOIDmode, target,
28131 gen_rtx_VEC_CONCAT (mode, op0, op1)));
28135 if (!TARGET_SSE4_1)
28143 /* Don't use ix86_expand_vector_init_interleave if we can't
28144 move from GPR to SSE register directly. */
28145 if (!TARGET_INTER_UNIT_MOVES)
28148 n = GET_MODE_NUNITS (mode);
28149 for (i = 0; i < n; i++)
28150 ops[i] = XVECEXP (vals, 0, i);
28151 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
28159 gcc_unreachable ();
28163 int i, j, n_elts, n_words, n_elt_per_word;
28164 enum machine_mode inner_mode;
28165 rtx words[4], shift;
28167 inner_mode = GET_MODE_INNER (mode);
28168 n_elts = GET_MODE_NUNITS (mode);
28169 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
28170 n_elt_per_word = n_elts / n_words;
28171 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
28173 for (i = 0; i < n_words; ++i)
28175 rtx word = NULL_RTX;
28177 for (j = 0; j < n_elt_per_word; ++j)
28179 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
28180 elt = convert_modes (word_mode, inner_mode, elt, true);
28186 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
28187 word, 1, OPTAB_LIB_WIDEN);
28188 word = expand_simple_binop (word_mode, IOR, word, elt,
28189 word, 1, OPTAB_LIB_WIDEN);
28197 emit_move_insn (target, gen_lowpart (mode, words[0]));
28198 else if (n_words == 2)
28200 rtx tmp = gen_reg_rtx (mode);
28201 emit_clobber (tmp);
28202 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
28203 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
28204 emit_move_insn (target, tmp);
28206 else if (n_words == 4)
28208 rtx tmp = gen_reg_rtx (V4SImode);
28209 gcc_assert (word_mode == SImode);
28210 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
28211 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
28212 emit_move_insn (target, gen_lowpart (mode, tmp));
28215 gcc_unreachable ();
28219 /* Initialize vector TARGET via VALS. Suppress the use of MMX
28220 instructions unless MMX_OK is true. */
28223 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
28225 enum machine_mode mode = GET_MODE (target);
28226 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28227 int n_elts = GET_MODE_NUNITS (mode);
28228 int n_var = 0, one_var = -1;
28229 bool all_same = true, all_const_zero = true;
28233 for (i = 0; i < n_elts; ++i)
28235 x = XVECEXP (vals, 0, i);
28236 if (!(CONST_INT_P (x)
28237 || GET_CODE (x) == CONST_DOUBLE
28238 || GET_CODE (x) == CONST_FIXED))
28239 n_var++, one_var = i;
28240 else if (x != CONST0_RTX (inner_mode))
28241 all_const_zero = false;
28242 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
28246 /* Constants are best loaded from the constant pool. */
28249 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
28253 /* If all values are identical, broadcast the value. */
28255 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
28256 XVECEXP (vals, 0, 0)))
28259 /* Values where only one field is non-constant are best loaded from
28260 the pool and overwritten via move later. */
28264 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
28265 XVECEXP (vals, 0, one_var),
28269 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
28273 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
28277 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
28279 enum machine_mode mode = GET_MODE (target);
28280 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28281 enum machine_mode half_mode;
28282 bool use_vec_merge = false;
28284 static rtx (*gen_extract[6][2]) (rtx, rtx)
28286 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
28287 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
28288 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
28289 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
28290 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
28291 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
28293 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
28295 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
28296 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
28297 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
28298 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
28299 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
28300 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
28310 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
28311 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
28313 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
28315 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
28316 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28322 use_vec_merge = TARGET_SSE4_1;
28330 /* For the two element vectors, we implement a VEC_CONCAT with
28331 the extraction of the other element. */
28333 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
28334 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
28337 op0 = val, op1 = tmp;
28339 op0 = tmp, op1 = val;
28341 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
28342 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28347 use_vec_merge = TARGET_SSE4_1;
28354 use_vec_merge = true;
28358 /* tmp = target = A B C D */
28359 tmp = copy_to_reg (target);
28360 /* target = A A B B */
28361 emit_insn (gen_sse_unpcklps (target, target, target));
28362 /* target = X A B B */
28363 ix86_expand_vector_set (false, target, val, 0);
28364 /* target = A X C D */
28365 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28366 GEN_INT (1), GEN_INT (0),
28367 GEN_INT (2+4), GEN_INT (3+4)));
28371 /* tmp = target = A B C D */
28372 tmp = copy_to_reg (target);
28373 /* tmp = X B C D */
28374 ix86_expand_vector_set (false, tmp, val, 0);
28375 /* target = A B X D */
28376 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28377 GEN_INT (0), GEN_INT (1),
28378 GEN_INT (0+4), GEN_INT (3+4)));
28382 /* tmp = target = A B C D */
28383 tmp = copy_to_reg (target);
28384 /* tmp = X B C D */
28385 ix86_expand_vector_set (false, tmp, val, 0);
28386 /* target = A B X D */
28387 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28388 GEN_INT (0), GEN_INT (1),
28389 GEN_INT (2+4), GEN_INT (0+4)));
28393 gcc_unreachable ();
28398 use_vec_merge = TARGET_SSE4_1;
28402 /* Element 0 handled by vec_merge below. */
28405 use_vec_merge = true;
28411 /* With SSE2, use integer shuffles to swap element 0 and ELT,
28412 store into element 0, then shuffle them back. */
28416 order[0] = GEN_INT (elt);
28417 order[1] = const1_rtx;
28418 order[2] = const2_rtx;
28419 order[3] = GEN_INT (3);
28420 order[elt] = const0_rtx;
28422 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28423 order[1], order[2], order[3]));
28425 ix86_expand_vector_set (false, target, val, 0);
28427 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28428 order[1], order[2], order[3]));
28432 /* For SSE1, we have to reuse the V4SF code. */
28433 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
28434 gen_lowpart (SFmode, val), elt);
28439 use_vec_merge = TARGET_SSE2;
28442 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28446 use_vec_merge = TARGET_SSE4_1;
28453 half_mode = V16QImode;
28459 half_mode = V8HImode;
28465 half_mode = V4SImode;
28471 half_mode = V2DImode;
28477 half_mode = V4SFmode;
28483 half_mode = V2DFmode;
28489 /* Compute offset. */
28493 gcc_assert (i <= 1);
28495 /* Extract the half. */
28496 tmp = gen_reg_rtx (half_mode);
28497 emit_insn ((*gen_extract[j][i]) (tmp, target));
28499 /* Put val in tmp at elt. */
28500 ix86_expand_vector_set (false, tmp, val, elt);
28503 emit_insn ((*gen_insert[j][i]) (target, target, tmp));
28512 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
28513 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
28514 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28518 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28520 emit_move_insn (mem, target);
28522 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28523 emit_move_insn (tmp, val);
28525 emit_move_insn (target, mem);
28530 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
28532 enum machine_mode mode = GET_MODE (vec);
28533 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28534 bool use_vec_extr = false;
28547 use_vec_extr = true;
28551 use_vec_extr = TARGET_SSE4_1;
28563 tmp = gen_reg_rtx (mode);
28564 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
28565 GEN_INT (elt), GEN_INT (elt),
28566 GEN_INT (elt+4), GEN_INT (elt+4)));
28570 tmp = gen_reg_rtx (mode);
28571 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
28575 gcc_unreachable ();
28578 use_vec_extr = true;
28583 use_vec_extr = TARGET_SSE4_1;
28597 tmp = gen_reg_rtx (mode);
28598 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
28599 GEN_INT (elt), GEN_INT (elt),
28600 GEN_INT (elt), GEN_INT (elt)));
28604 tmp = gen_reg_rtx (mode);
28605 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
28609 gcc_unreachable ();
28612 use_vec_extr = true;
28617 /* For SSE1, we have to reuse the V4SF code. */
28618 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
28619 gen_lowpart (V4SFmode, vec), elt);
28625 use_vec_extr = TARGET_SSE2;
28628 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28632 use_vec_extr = TARGET_SSE4_1;
28636 /* ??? Could extract the appropriate HImode element and shift. */
28643 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
28644 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
28646 /* Let the rtl optimizers know about the zero extension performed. */
28647 if (inner_mode == QImode || inner_mode == HImode)
28649 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
28650 target = gen_lowpart (SImode, target);
28653 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28657 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28659 emit_move_insn (mem, vec);
28661 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28662 emit_move_insn (target, tmp);
28666 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
28667 pattern to reduce; DEST is the destination; IN is the input vector. */
28670 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
28672 rtx tmp1, tmp2, tmp3;
28674 tmp1 = gen_reg_rtx (V4SFmode);
28675 tmp2 = gen_reg_rtx (V4SFmode);
28676 tmp3 = gen_reg_rtx (V4SFmode);
28678 emit_insn (gen_sse_movhlps (tmp1, in, in));
28679 emit_insn (fn (tmp2, tmp1, in));
28681 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
28682 GEN_INT (1), GEN_INT (1),
28683 GEN_INT (1+4), GEN_INT (1+4)));
28684 emit_insn (fn (dest, tmp2, tmp3));
28687 /* Target hook for scalar_mode_supported_p. */
28689 ix86_scalar_mode_supported_p (enum machine_mode mode)
28691 if (DECIMAL_FLOAT_MODE_P (mode))
28693 else if (mode == TFmode)
28696 return default_scalar_mode_supported_p (mode);
28699 /* Implements target hook vector_mode_supported_p. */
28701 ix86_vector_mode_supported_p (enum machine_mode mode)
28703 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
28705 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
28707 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
28709 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
28711 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
28716 /* Target hook for c_mode_for_suffix. */
28717 static enum machine_mode
28718 ix86_c_mode_for_suffix (char suffix)
28728 /* Worker function for TARGET_MD_ASM_CLOBBERS.
28730 We do this in the new i386 backend to maintain source compatibility
28731 with the old cc0-based compiler. */
28734 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
28735 tree inputs ATTRIBUTE_UNUSED,
28738 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
28740 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
28745 /* Implements target vector targetm.asm.encode_section_info. This
28746 is not used by netware. */
28748 static void ATTRIBUTE_UNUSED
28749 ix86_encode_section_info (tree decl, rtx rtl, int first)
28751 default_encode_section_info (decl, rtl, first);
28753 if (TREE_CODE (decl) == VAR_DECL
28754 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
28755 && ix86_in_large_data_p (decl))
28756 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
28759 /* Worker function for REVERSE_CONDITION. */
28762 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
28764 return (mode != CCFPmode && mode != CCFPUmode
28765 ? reverse_condition (code)
28766 : reverse_condition_maybe_unordered (code));
28769 /* Output code to perform an x87 FP register move, from OPERANDS[1]
28773 output_387_reg_move (rtx insn, rtx *operands)
28775 if (REG_P (operands[0]))
28777 if (REG_P (operands[1])
28778 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
28780 if (REGNO (operands[0]) == FIRST_STACK_REG)
28781 return output_387_ffreep (operands, 0);
28782 return "fstp\t%y0";
28784 if (STACK_TOP_P (operands[0]))
28785 return "fld%z1\t%y1";
28788 else if (MEM_P (operands[0]))
28790 gcc_assert (REG_P (operands[1]));
28791 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
28792 return "fstp%z0\t%y0";
28795 /* There is no non-popping store to memory for XFmode.
28796 So if we need one, follow the store with a load. */
28797 if (GET_MODE (operands[0]) == XFmode)
28798 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
28800 return "fst%z0\t%y0";
28807 /* Output code to perform a conditional jump to LABEL, if C2 flag in
28808 FP status register is set. */
28811 ix86_emit_fp_unordered_jump (rtx label)
28813 rtx reg = gen_reg_rtx (HImode);
28816 emit_insn (gen_x86_fnstsw_1 (reg));
28818 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
28820 emit_insn (gen_x86_sahf_1 (reg));
28822 temp = gen_rtx_REG (CCmode, FLAGS_REG);
28823 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
28827 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
28829 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
28830 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
28833 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
28834 gen_rtx_LABEL_REF (VOIDmode, label),
28836 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
28838 emit_jump_insn (temp);
28839 predict_jump (REG_BR_PROB_BASE * 10 / 100);
28842 /* Output code to perform a log1p XFmode calculation. */
28844 void ix86_emit_i387_log1p (rtx op0, rtx op1)
28846 rtx label1 = gen_label_rtx ();
28847 rtx label2 = gen_label_rtx ();
28849 rtx tmp = gen_reg_rtx (XFmode);
28850 rtx tmp2 = gen_reg_rtx (XFmode);
28852 emit_insn (gen_absxf2 (tmp, op1));
28853 emit_insn (gen_cmpxf (tmp,
28854 CONST_DOUBLE_FROM_REAL_VALUE (
28855 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
28857 emit_jump_insn (gen_bge (label1));
28859 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
28860 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
28861 emit_jump (label2);
28863 emit_label (label1);
28864 emit_move_insn (tmp, CONST1_RTX (XFmode));
28865 emit_insn (gen_addxf3 (tmp, op1, tmp));
28866 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
28867 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
28869 emit_label (label2);
28872 /* Output code to perform a Newton-Rhapson approximation of a single precision
28873 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
28875 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
28877 rtx x0, x1, e0, e1, two;
28879 x0 = gen_reg_rtx (mode);
28880 e0 = gen_reg_rtx (mode);
28881 e1 = gen_reg_rtx (mode);
28882 x1 = gen_reg_rtx (mode);
28884 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
28886 if (VECTOR_MODE_P (mode))
28887 two = ix86_build_const_vector (SFmode, true, two);
28889 two = force_reg (mode, two);
28891 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
28893 /* x0 = rcp(b) estimate */
28894 emit_insn (gen_rtx_SET (VOIDmode, x0,
28895 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
28898 emit_insn (gen_rtx_SET (VOIDmode, e0,
28899 gen_rtx_MULT (mode, x0, b)));
28901 emit_insn (gen_rtx_SET (VOIDmode, e1,
28902 gen_rtx_MINUS (mode, two, e0)));
28904 emit_insn (gen_rtx_SET (VOIDmode, x1,
28905 gen_rtx_MULT (mode, x0, e1)));
28907 emit_insn (gen_rtx_SET (VOIDmode, res,
28908 gen_rtx_MULT (mode, a, x1)));
28911 /* Output code to perform a Newton-Rhapson approximation of a
28912 single precision floating point [reciprocal] square root. */
28914 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
28917 rtx x0, e0, e1, e2, e3, mthree, mhalf;
28920 x0 = gen_reg_rtx (mode);
28921 e0 = gen_reg_rtx (mode);
28922 e1 = gen_reg_rtx (mode);
28923 e2 = gen_reg_rtx (mode);
28924 e3 = gen_reg_rtx (mode);
28926 real_from_integer (&r, VOIDmode, -3, -1, 0);
28927 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
28929 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
28930 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
28932 if (VECTOR_MODE_P (mode))
28934 mthree = ix86_build_const_vector (SFmode, true, mthree);
28935 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
28938 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
28939 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
28941 /* x0 = rsqrt(a) estimate */
28942 emit_insn (gen_rtx_SET (VOIDmode, x0,
28943 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
28946 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
28951 zero = gen_reg_rtx (mode);
28952 mask = gen_reg_rtx (mode);
28954 zero = force_reg (mode, CONST0_RTX(mode));
28955 emit_insn (gen_rtx_SET (VOIDmode, mask,
28956 gen_rtx_NE (mode, zero, a)));
28958 emit_insn (gen_rtx_SET (VOIDmode, x0,
28959 gen_rtx_AND (mode, x0, mask)));
28963 emit_insn (gen_rtx_SET (VOIDmode, e0,
28964 gen_rtx_MULT (mode, x0, a)));
28966 emit_insn (gen_rtx_SET (VOIDmode, e1,
28967 gen_rtx_MULT (mode, e0, x0)));
28970 mthree = force_reg (mode, mthree);
28971 emit_insn (gen_rtx_SET (VOIDmode, e2,
28972 gen_rtx_PLUS (mode, e1, mthree)));
28974 mhalf = force_reg (mode, mhalf);
28976 /* e3 = -.5 * x0 */
28977 emit_insn (gen_rtx_SET (VOIDmode, e3,
28978 gen_rtx_MULT (mode, x0, mhalf)));
28980 /* e3 = -.5 * e0 */
28981 emit_insn (gen_rtx_SET (VOIDmode, e3,
28982 gen_rtx_MULT (mode, e0, mhalf)));
28983 /* ret = e2 * e3 */
28984 emit_insn (gen_rtx_SET (VOIDmode, res,
28985 gen_rtx_MULT (mode, e2, e3)));
28988 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
28990 static void ATTRIBUTE_UNUSED
28991 i386_solaris_elf_named_section (const char *name, unsigned int flags,
28994 /* With Binutils 2.15, the "@unwind" marker must be specified on
28995 every occurrence of the ".eh_frame" section, not just the first
28998 && strcmp (name, ".eh_frame") == 0)
29000 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
29001 flags & SECTION_WRITE ? "aw" : "a");
29004 default_elf_asm_named_section (name, flags, decl);
29007 /* Return the mangling of TYPE if it is an extended fundamental type. */
29009 static const char *
29010 ix86_mangle_type (const_tree type)
29012 type = TYPE_MAIN_VARIANT (type);
29014 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
29015 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
29018 switch (TYPE_MODE (type))
29021 /* __float128 is "g". */
29024 /* "long double" or __float80 is "e". */
29031 /* For 32-bit code we can save PIC register setup by using
29032 __stack_chk_fail_local hidden function instead of calling
29033 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
29034 register, so it is better to call __stack_chk_fail directly. */
29037 ix86_stack_protect_fail (void)
29039 return TARGET_64BIT
29040 ? default_external_stack_protect_fail ()
29041 : default_hidden_stack_protect_fail ();
29044 /* Select a format to encode pointers in exception handling data. CODE
29045 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
29046 true if the symbol may be affected by dynamic relocations.
29048 ??? All x86 object file formats are capable of representing this.
29049 After all, the relocation needed is the same as for the call insn.
29050 Whether or not a particular assembler allows us to enter such, I
29051 guess we'll have to see. */
29053 asm_preferred_eh_data_format (int code, int global)
29057 int type = DW_EH_PE_sdata8;
29059 || ix86_cmodel == CM_SMALL_PIC
29060 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
29061 type = DW_EH_PE_sdata4;
29062 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
29064 if (ix86_cmodel == CM_SMALL
29065 || (ix86_cmodel == CM_MEDIUM && code))
29066 return DW_EH_PE_udata4;
29067 return DW_EH_PE_absptr;
29070 /* Expand copysign from SIGN to the positive value ABS_VALUE
29071 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
29074 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
29076 enum machine_mode mode = GET_MODE (sign);
29077 rtx sgn = gen_reg_rtx (mode);
29078 if (mask == NULL_RTX)
29080 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
29081 if (!VECTOR_MODE_P (mode))
29083 /* We need to generate a scalar mode mask in this case. */
29084 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29085 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29086 mask = gen_reg_rtx (mode);
29087 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29091 mask = gen_rtx_NOT (mode, mask);
29092 emit_insn (gen_rtx_SET (VOIDmode, sgn,
29093 gen_rtx_AND (mode, mask, sign)));
29094 emit_insn (gen_rtx_SET (VOIDmode, result,
29095 gen_rtx_IOR (mode, abs_value, sgn)));
29098 /* Expand fabs (OP0) and return a new rtx that holds the result. The
29099 mask for masking out the sign-bit is stored in *SMASK, if that is
29102 ix86_expand_sse_fabs (rtx op0, rtx *smask)
29104 enum machine_mode mode = GET_MODE (op0);
29107 xa = gen_reg_rtx (mode);
29108 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
29109 if (!VECTOR_MODE_P (mode))
29111 /* We need to generate a scalar mode mask in this case. */
29112 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29113 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29114 mask = gen_reg_rtx (mode);
29115 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29117 emit_insn (gen_rtx_SET (VOIDmode, xa,
29118 gen_rtx_AND (mode, op0, mask)));
29126 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
29127 swapping the operands if SWAP_OPERANDS is true. The expanded
29128 code is a forward jump to a newly created label in case the
29129 comparison is true. The generated label rtx is returned. */
29131 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
29132 bool swap_operands)
29143 label = gen_label_rtx ();
29144 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
29145 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29146 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
29147 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
29148 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
29149 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
29150 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
29151 JUMP_LABEL (tmp) = label;
29156 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
29157 using comparison code CODE. Operands are swapped for the comparison if
29158 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
29160 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
29161 bool swap_operands)
29163 enum machine_mode mode = GET_MODE (op0);
29164 rtx mask = gen_reg_rtx (mode);
29173 if (mode == DFmode)
29174 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
29175 gen_rtx_fmt_ee (code, mode, op0, op1)));
29177 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
29178 gen_rtx_fmt_ee (code, mode, op0, op1)));
29183 /* Generate and return a rtx of mode MODE for 2**n where n is the number
29184 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
29186 ix86_gen_TWO52 (enum machine_mode mode)
29188 REAL_VALUE_TYPE TWO52r;
29191 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
29192 TWO52 = const_double_from_real_value (TWO52r, mode);
29193 TWO52 = force_reg (mode, TWO52);
29198 /* Expand SSE sequence for computing lround from OP1 storing
29201 ix86_expand_lround (rtx op0, rtx op1)
29203 /* C code for the stuff we're doing below:
29204 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
29207 enum machine_mode mode = GET_MODE (op1);
29208 const struct real_format *fmt;
29209 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29212 /* load nextafter (0.5, 0.0) */
29213 fmt = REAL_MODE_FORMAT (mode);
29214 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29215 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29217 /* adj = copysign (0.5, op1) */
29218 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
29219 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
29221 /* adj = op1 + adj */
29222 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
29224 /* op0 = (imode)adj */
29225 expand_fix (op0, adj, 0);
29228 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
29231 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
29233 /* C code for the stuff we're doing below (for do_floor):
29235 xi -= (double)xi > op1 ? 1 : 0;
29238 enum machine_mode fmode = GET_MODE (op1);
29239 enum machine_mode imode = GET_MODE (op0);
29240 rtx ireg, freg, label, tmp;
29242 /* reg = (long)op1 */
29243 ireg = gen_reg_rtx (imode);
29244 expand_fix (ireg, op1, 0);
29246 /* freg = (double)reg */
29247 freg = gen_reg_rtx (fmode);
29248 expand_float (freg, ireg, 0);
29250 /* ireg = (freg > op1) ? ireg - 1 : ireg */
29251 label = ix86_expand_sse_compare_and_jump (UNLE,
29252 freg, op1, !do_floor);
29253 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
29254 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
29255 emit_move_insn (ireg, tmp);
29257 emit_label (label);
29258 LABEL_NUSES (label) = 1;
29260 emit_move_insn (op0, ireg);
29263 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
29264 result in OPERAND0. */
29266 ix86_expand_rint (rtx operand0, rtx operand1)
29268 /* C code for the stuff we're doing below:
29269 xa = fabs (operand1);
29270 if (!isless (xa, 2**52))
29272 xa = xa + 2**52 - 2**52;
29273 return copysign (xa, operand1);
29275 enum machine_mode mode = GET_MODE (operand0);
29276 rtx res, xa, label, TWO52, mask;
29278 res = gen_reg_rtx (mode);
29279 emit_move_insn (res, operand1);
29281 /* xa = abs (operand1) */
29282 xa = ix86_expand_sse_fabs (res, &mask);
29284 /* if (!isless (xa, TWO52)) goto label; */
29285 TWO52 = ix86_gen_TWO52 (mode);
29286 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29288 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29289 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29291 ix86_sse_copysign_to_positive (res, xa, res, mask);
29293 emit_label (label);
29294 LABEL_NUSES (label) = 1;
29296 emit_move_insn (operand0, res);
29299 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29302 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
29304 /* C code for the stuff we expand below.
29305 double xa = fabs (x), x2;
29306 if (!isless (xa, TWO52))
29308 xa = xa + TWO52 - TWO52;
29309 x2 = copysign (xa, x);
29318 enum machine_mode mode = GET_MODE (operand0);
29319 rtx xa, TWO52, tmp, label, one, res, mask;
29321 TWO52 = ix86_gen_TWO52 (mode);
29323 /* Temporary for holding the result, initialized to the input
29324 operand to ease control flow. */
29325 res = gen_reg_rtx (mode);
29326 emit_move_insn (res, operand1);
29328 /* xa = abs (operand1) */
29329 xa = ix86_expand_sse_fabs (res, &mask);
29331 /* if (!isless (xa, TWO52)) goto label; */
29332 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29334 /* xa = xa + TWO52 - TWO52; */
29335 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29336 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29338 /* xa = copysign (xa, operand1) */
29339 ix86_sse_copysign_to_positive (xa, xa, res, mask);
29341 /* generate 1.0 or -1.0 */
29342 one = force_reg (mode,
29343 const_double_from_real_value (do_floor
29344 ? dconst1 : dconstm1, mode));
29346 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29347 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29348 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29349 gen_rtx_AND (mode, one, tmp)));
29350 /* We always need to subtract here to preserve signed zero. */
29351 tmp = expand_simple_binop (mode, MINUS,
29352 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29353 emit_move_insn (res, tmp);
29355 emit_label (label);
29356 LABEL_NUSES (label) = 1;
29358 emit_move_insn (operand0, res);
29361 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29364 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
29366 /* C code for the stuff we expand below.
29367 double xa = fabs (x), x2;
29368 if (!isless (xa, TWO52))
29370 x2 = (double)(long)x;
29377 if (HONOR_SIGNED_ZEROS (mode))
29378 return copysign (x2, x);
29381 enum machine_mode mode = GET_MODE (operand0);
29382 rtx xa, xi, TWO52, tmp, label, one, res, mask;
29384 TWO52 = ix86_gen_TWO52 (mode);
29386 /* Temporary for holding the result, initialized to the input
29387 operand to ease control flow. */
29388 res = gen_reg_rtx (mode);
29389 emit_move_insn (res, operand1);
29391 /* xa = abs (operand1) */
29392 xa = ix86_expand_sse_fabs (res, &mask);
29394 /* if (!isless (xa, TWO52)) goto label; */
29395 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29397 /* xa = (double)(long)x */
29398 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29399 expand_fix (xi, res, 0);
29400 expand_float (xa, xi, 0);
29403 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29405 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29406 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29407 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29408 gen_rtx_AND (mode, one, tmp)));
29409 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
29410 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29411 emit_move_insn (res, tmp);
29413 if (HONOR_SIGNED_ZEROS (mode))
29414 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29416 emit_label (label);
29417 LABEL_NUSES (label) = 1;
29419 emit_move_insn (operand0, res);
29422 /* Expand SSE sequence for computing round from OPERAND1 storing
29423 into OPERAND0. Sequence that works without relying on DImode truncation
29424 via cvttsd2siq that is only available on 64bit targets. */
29426 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
29428 /* C code for the stuff we expand below.
29429 double xa = fabs (x), xa2, x2;
29430 if (!isless (xa, TWO52))
29432 Using the absolute value and copying back sign makes
29433 -0.0 -> -0.0 correct.
29434 xa2 = xa + TWO52 - TWO52;
29439 else if (dxa > 0.5)
29441 x2 = copysign (xa2, x);
29444 enum machine_mode mode = GET_MODE (operand0);
29445 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
29447 TWO52 = ix86_gen_TWO52 (mode);
29449 /* Temporary for holding the result, initialized to the input
29450 operand to ease control flow. */
29451 res = gen_reg_rtx (mode);
29452 emit_move_insn (res, operand1);
29454 /* xa = abs (operand1) */
29455 xa = ix86_expand_sse_fabs (res, &mask);
29457 /* if (!isless (xa, TWO52)) goto label; */
29458 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29460 /* xa2 = xa + TWO52 - TWO52; */
29461 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29462 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
29464 /* dxa = xa2 - xa; */
29465 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
29467 /* generate 0.5, 1.0 and -0.5 */
29468 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
29469 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
29470 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
29474 tmp = gen_reg_rtx (mode);
29475 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
29476 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
29477 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29478 gen_rtx_AND (mode, one, tmp)));
29479 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29480 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
29481 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
29482 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29483 gen_rtx_AND (mode, one, tmp)));
29484 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29486 /* res = copysign (xa2, operand1) */
29487 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
29489 emit_label (label);
29490 LABEL_NUSES (label) = 1;
29492 emit_move_insn (operand0, res);
29495 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29498 ix86_expand_trunc (rtx operand0, rtx operand1)
29500 /* C code for SSE variant we expand below.
29501 double xa = fabs (x), x2;
29502 if (!isless (xa, TWO52))
29504 x2 = (double)(long)x;
29505 if (HONOR_SIGNED_ZEROS (mode))
29506 return copysign (x2, x);
29509 enum machine_mode mode = GET_MODE (operand0);
29510 rtx xa, xi, TWO52, label, res, mask;
29512 TWO52 = ix86_gen_TWO52 (mode);
29514 /* Temporary for holding the result, initialized to the input
29515 operand to ease control flow. */
29516 res = gen_reg_rtx (mode);
29517 emit_move_insn (res, operand1);
29519 /* xa = abs (operand1) */
29520 xa = ix86_expand_sse_fabs (res, &mask);
29522 /* if (!isless (xa, TWO52)) goto label; */
29523 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29525 /* x = (double)(long)x */
29526 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29527 expand_fix (xi, res, 0);
29528 expand_float (res, xi, 0);
29530 if (HONOR_SIGNED_ZEROS (mode))
29531 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29533 emit_label (label);
29534 LABEL_NUSES (label) = 1;
29536 emit_move_insn (operand0, res);
29539 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29542 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
29544 enum machine_mode mode = GET_MODE (operand0);
29545 rtx xa, mask, TWO52, label, one, res, smask, tmp;
29547 /* C code for SSE variant we expand below.
29548 double xa = fabs (x), x2;
29549 if (!isless (xa, TWO52))
29551 xa2 = xa + TWO52 - TWO52;
29555 x2 = copysign (xa2, x);
29559 TWO52 = ix86_gen_TWO52 (mode);
29561 /* Temporary for holding the result, initialized to the input
29562 operand to ease control flow. */
29563 res = gen_reg_rtx (mode);
29564 emit_move_insn (res, operand1);
29566 /* xa = abs (operand1) */
29567 xa = ix86_expand_sse_fabs (res, &smask);
29569 /* if (!isless (xa, TWO52)) goto label; */
29570 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29572 /* res = xa + TWO52 - TWO52; */
29573 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29574 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
29575 emit_move_insn (res, tmp);
29578 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29580 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
29581 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
29582 emit_insn (gen_rtx_SET (VOIDmode, mask,
29583 gen_rtx_AND (mode, mask, one)));
29584 tmp = expand_simple_binop (mode, MINUS,
29585 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
29586 emit_move_insn (res, tmp);
29588 /* res = copysign (res, operand1) */
29589 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
29591 emit_label (label);
29592 LABEL_NUSES (label) = 1;
29594 emit_move_insn (operand0, res);
29597 /* Expand SSE sequence for computing round from OPERAND1 storing
29600 ix86_expand_round (rtx operand0, rtx operand1)
29602 /* C code for the stuff we're doing below:
29603 double xa = fabs (x);
29604 if (!isless (xa, TWO52))
29606 xa = (double)(long)(xa + nextafter (0.5, 0.0));
29607 return copysign (xa, x);
29609 enum machine_mode mode = GET_MODE (operand0);
29610 rtx res, TWO52, xa, label, xi, half, mask;
29611 const struct real_format *fmt;
29612 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29614 /* Temporary for holding the result, initialized to the input
29615 operand to ease control flow. */
29616 res = gen_reg_rtx (mode);
29617 emit_move_insn (res, operand1);
29619 TWO52 = ix86_gen_TWO52 (mode);
29620 xa = ix86_expand_sse_fabs (res, &mask);
29621 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29623 /* load nextafter (0.5, 0.0) */
29624 fmt = REAL_MODE_FORMAT (mode);
29625 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29626 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29628 /* xa = xa + 0.5 */
29629 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
29630 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
29632 /* xa = (double)(int64_t)xa */
29633 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29634 expand_fix (xi, xa, 0);
29635 expand_float (xa, xi, 0);
29637 /* res = copysign (xa, operand1) */
29638 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
29640 emit_label (label);
29641 LABEL_NUSES (label) = 1;
29643 emit_move_insn (operand0, res);
29647 /* Validate whether a SSE5 instruction is valid or not.
29648 OPERANDS is the array of operands.
29649 NUM is the number of operands.
29650 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
29651 NUM_MEMORY is the maximum number of memory operands to accept.
29652 when COMMUTATIVE is set, operand 1 and 2 can be swapped. */
29655 ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num,
29656 bool uses_oc0, int num_memory, bool commutative)
29662 /* Count the number of memory arguments */
29665 for (i = 0; i < num; i++)
29667 enum machine_mode mode = GET_MODE (operands[i]);
29668 if (register_operand (operands[i], mode))
29671 else if (memory_operand (operands[i], mode))
29673 mem_mask |= (1 << i);
29679 rtx pattern = PATTERN (insn);
29681 /* allow 0 for pcmov */
29682 if (GET_CODE (pattern) != SET
29683 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
29685 || operands[i] != CONST0_RTX (mode))
29690 /* Special case pmacsdq{l,h} where we allow the 3rd argument to be
29691 a memory operation. */
29692 if (num_memory < 0)
29694 num_memory = -num_memory;
29695 if ((mem_mask & (1 << (num-1))) != 0)
29697 mem_mask &= ~(1 << (num-1));
29702 /* If there were no memory operations, allow the insn */
29706 /* Do not allow the destination register to be a memory operand. */
29707 else if (mem_mask & (1 << 0))
29710 /* If there are too many memory operations, disallow the instruction. While
29711 the hardware only allows 1 memory reference, before register allocation
29712 for some insns, we allow two memory operations sometimes in order to allow
29713 code like the following to be optimized:
29715 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
29717 or similar cases that are vectorized into using the fmaddss
29719 else if (mem_count > num_memory)
29722 /* Don't allow more than one memory operation if not optimizing. */
29723 else if (mem_count > 1 && !optimize)
29726 else if (num == 4 && mem_count == 1)
29728 /* formats (destination is the first argument), example fmaddss:
29729 xmm1, xmm1, xmm2, xmm3/mem
29730 xmm1, xmm1, xmm2/mem, xmm3
29731 xmm1, xmm2, xmm3/mem, xmm1
29732 xmm1, xmm2/mem, xmm3, xmm1 */
29734 return ((mem_mask == (1 << 1))
29735 || (mem_mask == (1 << 2))
29736 || (mem_mask == (1 << 3)));
29738 /* format, example pmacsdd:
29739 xmm1, xmm2, xmm3/mem, xmm1 */
29741 return (mem_mask == (1 << 2) || mem_mask == (1 << 1));
29743 return (mem_mask == (1 << 2));
29746 else if (num == 4 && num_memory == 2)
29748 /* If there are two memory operations, we can load one of the memory ops
29749 into the destination register. This is for optimizing the
29750 multiply/add ops, which the combiner has optimized both the multiply
29751 and the add insns to have a memory operation. We have to be careful
29752 that the destination doesn't overlap with the inputs. */
29753 rtx op0 = operands[0];
29755 if (reg_mentioned_p (op0, operands[1])
29756 || reg_mentioned_p (op0, operands[2])
29757 || reg_mentioned_p (op0, operands[3]))
29760 /* formats (destination is the first argument), example fmaddss:
29761 xmm1, xmm1, xmm2, xmm3/mem
29762 xmm1, xmm1, xmm2/mem, xmm3
29763 xmm1, xmm2, xmm3/mem, xmm1
29764 xmm1, xmm2/mem, xmm3, xmm1
29766 For the oc0 case, we will load either operands[1] or operands[3] into
29767 operands[0], so any combination of 2 memory operands is ok. */
29771 /* format, example pmacsdd:
29772 xmm1, xmm2, xmm3/mem, xmm1
29774 For the integer multiply/add instructions be more restrictive and
29775 require operands[2] and operands[3] to be the memory operands. */
29777 return (mem_mask == ((1 << 1) | (1 << 3)) || ((1 << 2) | (1 << 3)));
29779 return (mem_mask == ((1 << 2) | (1 << 3)));
29782 else if (num == 3 && num_memory == 1)
29784 /* formats, example protb:
29785 xmm1, xmm2, xmm3/mem
29786 xmm1, xmm2/mem, xmm3 */
29788 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
29790 /* format, example comeq:
29791 xmm1, xmm2, xmm3/mem */
29793 return (mem_mask == (1 << 2));
29797 gcc_unreachable ();
29803 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
29804 hardware will allow by using the destination register to load one of the
29805 memory operations. Presently this is used by the multiply/add routines to
29806 allow 2 memory references. */
29809 ix86_expand_sse5_multiple_memory (rtx operands[],
29811 enum machine_mode mode)
29813 rtx op0 = operands[0];
29815 || memory_operand (op0, mode)
29816 || reg_mentioned_p (op0, operands[1])
29817 || reg_mentioned_p (op0, operands[2])
29818 || reg_mentioned_p (op0, operands[3]))
29819 gcc_unreachable ();
29821 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
29822 the destination register. */
29823 if (memory_operand (operands[1], mode))
29825 emit_move_insn (op0, operands[1]);
29828 else if (memory_operand (operands[3], mode))
29830 emit_move_insn (op0, operands[3]);
29834 gcc_unreachable ();
29840 /* Table of valid machine attributes. */
29841 static const struct attribute_spec ix86_attribute_table[] =
29843 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
29844 /* Stdcall attribute says callee is responsible for popping arguments
29845 if they are not variable. */
29846 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29847 /* Fastcall attribute says callee is responsible for popping arguments
29848 if they are not variable. */
29849 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29850 /* Cdecl attribute says the callee is a normal C declaration */
29851 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29852 /* Regparm attribute specifies how many integer arguments are to be
29853 passed in registers. */
29854 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
29855 /* Sseregparm attribute says we are using x86_64 calling conventions
29856 for FP arguments. */
29857 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29858 /* force_align_arg_pointer says this function realigns the stack at entry. */
29859 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
29860 false, true, true, ix86_handle_cconv_attribute },
29861 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
29862 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
29863 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
29864 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
29866 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
29867 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
29868 #ifdef SUBTARGET_ATTRIBUTE_TABLE
29869 SUBTARGET_ATTRIBUTE_TABLE,
29871 /* ms_abi and sysv_abi calling convention function attributes. */
29872 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
29873 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
29875 { NULL, 0, 0, false, false, false, NULL }
29878 /* Implement targetm.vectorize.builtin_vectorization_cost. */
29880 x86_builtin_vectorization_cost (bool runtime_test)
29882 /* If the branch of the runtime test is taken - i.e. - the vectorized
29883 version is skipped - this incurs a misprediction cost (because the
29884 vectorized version is expected to be the fall-through). So we subtract
29885 the latency of a mispredicted branch from the costs that are incured
29886 when the vectorized version is executed.
29888 TODO: The values in individual target tables have to be tuned or new
29889 fields may be needed. For eg. on K8, the default branch path is the
29890 not-taken path. If the taken path is predicted correctly, the minimum
29891 penalty of going down the taken-path is 1 cycle. If the taken-path is
29892 not predicted correctly, then the minimum penalty is 10 cycles. */
29896 return (-(ix86_cost->cond_taken_branch_cost));
29902 /* This function returns the calling abi specific va_list type node.
29903 It returns the FNDECL specific va_list type. */
29906 ix86_fn_abi_va_list (tree fndecl)
29909 return va_list_type_node;
29910 gcc_assert (fndecl != NULL_TREE);
29912 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
29913 return ms_va_list_type_node;
29915 return sysv_va_list_type_node;
29918 /* Returns the canonical va_list type specified by TYPE. If there
29919 is no valid TYPE provided, it return NULL_TREE. */
29922 ix86_canonical_va_list_type (tree type)
29926 /* Resolve references and pointers to va_list type. */
29927 if (INDIRECT_REF_P (type))
29928 type = TREE_TYPE (type);
29929 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
29930 type = TREE_TYPE (type);
29934 wtype = va_list_type_node;
29935 gcc_assert (wtype != NULL_TREE);
29937 if (TREE_CODE (wtype) == ARRAY_TYPE)
29939 /* If va_list is an array type, the argument may have decayed
29940 to a pointer type, e.g. by being passed to another function.
29941 In that case, unwrap both types so that we can compare the
29942 underlying records. */
29943 if (TREE_CODE (htype) == ARRAY_TYPE
29944 || POINTER_TYPE_P (htype))
29946 wtype = TREE_TYPE (wtype);
29947 htype = TREE_TYPE (htype);
29950 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
29951 return va_list_type_node;
29952 wtype = sysv_va_list_type_node;
29953 gcc_assert (wtype != NULL_TREE);
29955 if (TREE_CODE (wtype) == ARRAY_TYPE)
29957 /* If va_list is an array type, the argument may have decayed
29958 to a pointer type, e.g. by being passed to another function.
29959 In that case, unwrap both types so that we can compare the
29960 underlying records. */
29961 if (TREE_CODE (htype) == ARRAY_TYPE
29962 || POINTER_TYPE_P (htype))
29964 wtype = TREE_TYPE (wtype);
29965 htype = TREE_TYPE (htype);
29968 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
29969 return sysv_va_list_type_node;
29970 wtype = ms_va_list_type_node;
29971 gcc_assert (wtype != NULL_TREE);
29973 if (TREE_CODE (wtype) == ARRAY_TYPE)
29975 /* If va_list is an array type, the argument may have decayed
29976 to a pointer type, e.g. by being passed to another function.
29977 In that case, unwrap both types so that we can compare the
29978 underlying records. */
29979 if (TREE_CODE (htype) == ARRAY_TYPE
29980 || POINTER_TYPE_P (htype))
29982 wtype = TREE_TYPE (wtype);
29983 htype = TREE_TYPE (htype);
29986 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
29987 return ms_va_list_type_node;
29990 return std_canonical_va_list_type (type);
29993 /* Iterate through the target-specific builtin types for va_list.
29994 IDX denotes the iterator, *PTREE is set to the result type of
29995 the va_list builtin, and *PNAME to its internal type.
29996 Returns zero if there is no element for this index, otherwise
29997 IDX should be increased upon the next call.
29998 Note, do not iterate a base builtin's name like __builtin_va_list.
29999 Used from c_common_nodes_and_builtins. */
30002 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
30008 *ptree = ms_va_list_type_node;
30009 *pname = "__builtin_ms_va_list";
30012 *ptree = sysv_va_list_type_node;
30013 *pname = "__builtin_sysv_va_list";
30021 /* Initialize the GCC target structure. */
30022 #undef TARGET_RETURN_IN_MEMORY
30023 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
30025 #undef TARGET_ATTRIBUTE_TABLE
30026 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
30027 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30028 # undef TARGET_MERGE_DECL_ATTRIBUTES
30029 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
30032 #undef TARGET_COMP_TYPE_ATTRIBUTES
30033 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
30035 #undef TARGET_INIT_BUILTINS
30036 #define TARGET_INIT_BUILTINS ix86_init_builtins
30037 #undef TARGET_EXPAND_BUILTIN
30038 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
30040 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
30041 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
30042 ix86_builtin_vectorized_function
30044 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
30045 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
30047 #undef TARGET_BUILTIN_RECIPROCAL
30048 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
30050 #undef TARGET_ASM_FUNCTION_EPILOGUE
30051 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
30053 #undef TARGET_ENCODE_SECTION_INFO
30054 #ifndef SUBTARGET_ENCODE_SECTION_INFO
30055 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
30057 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
30060 #undef TARGET_ASM_OPEN_PAREN
30061 #define TARGET_ASM_OPEN_PAREN ""
30062 #undef TARGET_ASM_CLOSE_PAREN
30063 #define TARGET_ASM_CLOSE_PAREN ""
30065 #undef TARGET_ASM_ALIGNED_HI_OP
30066 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
30067 #undef TARGET_ASM_ALIGNED_SI_OP
30068 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
30070 #undef TARGET_ASM_ALIGNED_DI_OP
30071 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
30074 #undef TARGET_ASM_UNALIGNED_HI_OP
30075 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
30076 #undef TARGET_ASM_UNALIGNED_SI_OP
30077 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
30078 #undef TARGET_ASM_UNALIGNED_DI_OP
30079 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
30081 #undef TARGET_SCHED_ADJUST_COST
30082 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
30083 #undef TARGET_SCHED_ISSUE_RATE
30084 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
30085 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
30086 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
30087 ia32_multipass_dfa_lookahead
30089 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
30090 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
30093 #undef TARGET_HAVE_TLS
30094 #define TARGET_HAVE_TLS true
30096 #undef TARGET_CANNOT_FORCE_CONST_MEM
30097 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
30098 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
30099 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
30101 #undef TARGET_DELEGITIMIZE_ADDRESS
30102 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
30104 #undef TARGET_MS_BITFIELD_LAYOUT_P
30105 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
30108 #undef TARGET_BINDS_LOCAL_P
30109 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
30111 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30112 #undef TARGET_BINDS_LOCAL_P
30113 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
30116 #undef TARGET_ASM_OUTPUT_MI_THUNK
30117 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
30118 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
30119 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
30121 #undef TARGET_ASM_FILE_START
30122 #define TARGET_ASM_FILE_START x86_file_start
30124 #undef TARGET_DEFAULT_TARGET_FLAGS
30125 #define TARGET_DEFAULT_TARGET_FLAGS \
30127 | TARGET_SUBTARGET_DEFAULT \
30128 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
30130 #undef TARGET_HANDLE_OPTION
30131 #define TARGET_HANDLE_OPTION ix86_handle_option
30133 #undef TARGET_RTX_COSTS
30134 #define TARGET_RTX_COSTS ix86_rtx_costs
30135 #undef TARGET_ADDRESS_COST
30136 #define TARGET_ADDRESS_COST ix86_address_cost
30138 #undef TARGET_FIXED_CONDITION_CODE_REGS
30139 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
30140 #undef TARGET_CC_MODES_COMPATIBLE
30141 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
30143 #undef TARGET_MACHINE_DEPENDENT_REORG
30144 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
30146 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
30147 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
30149 #undef TARGET_BUILD_BUILTIN_VA_LIST
30150 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
30152 #undef TARGET_FN_ABI_VA_LIST
30153 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
30155 #undef TARGET_CANONICAL_VA_LIST_TYPE
30156 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
30158 #undef TARGET_EXPAND_BUILTIN_VA_START
30159 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
30161 #undef TARGET_MD_ASM_CLOBBERS
30162 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
30164 #undef TARGET_PROMOTE_PROTOTYPES
30165 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
30166 #undef TARGET_STRUCT_VALUE_RTX
30167 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
30168 #undef TARGET_SETUP_INCOMING_VARARGS
30169 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
30170 #undef TARGET_MUST_PASS_IN_STACK
30171 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
30172 #undef TARGET_PASS_BY_REFERENCE
30173 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
30174 #undef TARGET_INTERNAL_ARG_POINTER
30175 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
30176 #undef TARGET_UPDATE_STACK_BOUNDARY
30177 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
30178 #undef TARGET_GET_DRAP_RTX
30179 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
30180 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
30181 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
30182 #undef TARGET_STRICT_ARGUMENT_NAMING
30183 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
30185 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
30186 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
30188 #undef TARGET_SCALAR_MODE_SUPPORTED_P
30189 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
30191 #undef TARGET_VECTOR_MODE_SUPPORTED_P
30192 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
30194 #undef TARGET_C_MODE_FOR_SUFFIX
30195 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
30198 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
30199 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
30202 #ifdef SUBTARGET_INSERT_ATTRIBUTES
30203 #undef TARGET_INSERT_ATTRIBUTES
30204 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
30207 #undef TARGET_MANGLE_TYPE
30208 #define TARGET_MANGLE_TYPE ix86_mangle_type
30210 #undef TARGET_STACK_PROTECT_FAIL
30211 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
30213 #undef TARGET_FUNCTION_VALUE
30214 #define TARGET_FUNCTION_VALUE ix86_function_value
30216 #undef TARGET_SECONDARY_RELOAD
30217 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
30219 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
30220 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
30222 #undef TARGET_SET_CURRENT_FUNCTION
30223 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
30225 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
30226 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
30228 #undef TARGET_OPTION_SAVE
30229 #define TARGET_OPTION_SAVE ix86_function_specific_save
30231 #undef TARGET_OPTION_RESTORE
30232 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
30234 #undef TARGET_OPTION_PRINT
30235 #define TARGET_OPTION_PRINT ix86_function_specific_print
30237 #undef TARGET_OPTION_CAN_INLINE_P
30238 #define TARGET_OPTION_CAN_INLINE_P ix86_can_inline_p
30240 #undef TARGET_EXPAND_TO_RTL_HOOK
30241 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
30243 struct gcc_target targetm = TARGET_INITIALIZER;
30245 #include "gt-i386.h"