1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
53 #include "tm-constrs.h"
57 #include "dwarf2out.h"
59 static rtx legitimize_dllimport_symbol (rtx, bool);
61 #ifndef CHECK_STACK_LIMIT
62 #define CHECK_STACK_LIMIT (-1)
65 /* Return index of given mode in mult and division cost tables. */
66 #define MODE_INDEX(mode) \
67 ((mode) == QImode ? 0 \
68 : (mode) == HImode ? 1 \
69 : (mode) == SImode ? 2 \
70 : (mode) == DImode ? 3 \
73 /* Processor costs (relative to an add) */
74 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
75 #define COSTS_N_BYTES(N) ((N) * 2)
77 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
80 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
81 COSTS_N_BYTES (2), /* cost of an add instruction */
82 COSTS_N_BYTES (3), /* cost of a lea instruction */
83 COSTS_N_BYTES (2), /* variable shift costs */
84 COSTS_N_BYTES (3), /* constant shift costs */
85 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 0, /* cost of multiply per each bit set */
91 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
92 COSTS_N_BYTES (3), /* HI */
93 COSTS_N_BYTES (3), /* SI */
94 COSTS_N_BYTES (3), /* DI */
95 COSTS_N_BYTES (5)}, /* other */
96 COSTS_N_BYTES (3), /* cost of movsx */
97 COSTS_N_BYTES (3), /* cost of movzx */
100 2, /* cost for loading QImode using movzbl */
101 {2, 2, 2}, /* cost of loading integer registers
102 in QImode, HImode and SImode.
103 Relative to reg-reg move (2). */
104 {2, 2, 2}, /* cost of storing integer registers */
105 2, /* cost of reg,reg fld/fst */
106 {2, 2, 2}, /* cost of loading fp registers
107 in SFmode, DFmode and XFmode */
108 {2, 2, 2}, /* cost of storing fp registers
109 in SFmode, DFmode and XFmode */
110 3, /* cost of moving MMX register */
111 {3, 3}, /* cost of loading MMX registers
112 in SImode and DImode */
113 {3, 3}, /* cost of storing MMX registers
114 in SImode and DImode */
115 3, /* cost of moving SSE register */
116 {3, 3, 3}, /* cost of loading SSE registers
117 in SImode, DImode and TImode */
118 {3, 3, 3}, /* cost of storing SSE registers
119 in SImode, DImode and TImode */
120 3, /* MMX or SSE register to integer */
121 0, /* size of l1 cache */
122 0, /* size of l2 cache */
123 0, /* size of prefetch block */
124 0, /* number of parallel prefetches */
126 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
127 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
128 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
129 COSTS_N_BYTES (2), /* cost of FABS instruction. */
130 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
131 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
132 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
133 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
134 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
135 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
136 1, /* scalar_stmt_cost. */
137 1, /* scalar load_cost. */
138 1, /* scalar_store_cost. */
139 1, /* vec_stmt_cost. */
140 1, /* vec_to_scalar_cost. */
141 1, /* scalar_to_vec_cost. */
142 1, /* vec_align_load_cost. */
143 1, /* vec_unalign_load_cost. */
144 1, /* vec_store_cost. */
145 1, /* cond_taken_branch_cost. */
146 1, /* cond_not_taken_branch_cost. */
149 /* Processor costs (relative to an add) */
151 struct processor_costs i386_cost = { /* 386 specific costs */
152 COSTS_N_INSNS (1), /* cost of an add instruction */
153 COSTS_N_INSNS (1), /* cost of a lea instruction */
154 COSTS_N_INSNS (3), /* variable shift costs */
155 COSTS_N_INSNS (2), /* constant shift costs */
156 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
157 COSTS_N_INSNS (6), /* HI */
158 COSTS_N_INSNS (6), /* SI */
159 COSTS_N_INSNS (6), /* DI */
160 COSTS_N_INSNS (6)}, /* other */
161 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
162 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
163 COSTS_N_INSNS (23), /* HI */
164 COSTS_N_INSNS (23), /* SI */
165 COSTS_N_INSNS (23), /* DI */
166 COSTS_N_INSNS (23)}, /* other */
167 COSTS_N_INSNS (3), /* cost of movsx */
168 COSTS_N_INSNS (2), /* cost of movzx */
169 15, /* "large" insn */
171 4, /* cost for loading QImode using movzbl */
172 {2, 4, 2}, /* cost of loading integer registers
173 in QImode, HImode and SImode.
174 Relative to reg-reg move (2). */
175 {2, 4, 2}, /* cost of storing integer registers */
176 2, /* cost of reg,reg fld/fst */
177 {8, 8, 8}, /* cost of loading fp registers
178 in SFmode, DFmode and XFmode */
179 {8, 8, 8}, /* cost of storing fp registers
180 in SFmode, DFmode and XFmode */
181 2, /* cost of moving MMX register */
182 {4, 8}, /* cost of loading MMX registers
183 in SImode and DImode */
184 {4, 8}, /* cost of storing MMX registers
185 in SImode and DImode */
186 2, /* cost of moving SSE register */
187 {4, 8, 16}, /* cost of loading SSE registers
188 in SImode, DImode and TImode */
189 {4, 8, 16}, /* cost of storing SSE registers
190 in SImode, DImode and TImode */
191 3, /* MMX or SSE register to integer */
192 0, /* size of l1 cache */
193 0, /* size of l2 cache */
194 0, /* size of prefetch block */
195 0, /* number of parallel prefetches */
197 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
198 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
199 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
200 COSTS_N_INSNS (22), /* cost of FABS instruction. */
201 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
202 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
203 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
204 DUMMY_STRINGOP_ALGS},
205 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
206 DUMMY_STRINGOP_ALGS},
207 1, /* scalar_stmt_cost. */
208 1, /* scalar load_cost. */
209 1, /* scalar_store_cost. */
210 1, /* vec_stmt_cost. */
211 1, /* vec_to_scalar_cost. */
212 1, /* scalar_to_vec_cost. */
213 1, /* vec_align_load_cost. */
214 2, /* vec_unalign_load_cost. */
215 1, /* vec_store_cost. */
216 3, /* cond_taken_branch_cost. */
217 1, /* cond_not_taken_branch_cost. */
221 struct processor_costs i486_cost = { /* 486 specific costs */
222 COSTS_N_INSNS (1), /* cost of an add instruction */
223 COSTS_N_INSNS (1), /* cost of a lea instruction */
224 COSTS_N_INSNS (3), /* variable shift costs */
225 COSTS_N_INSNS (2), /* constant shift costs */
226 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
227 COSTS_N_INSNS (12), /* HI */
228 COSTS_N_INSNS (12), /* SI */
229 COSTS_N_INSNS (12), /* DI */
230 COSTS_N_INSNS (12)}, /* other */
231 1, /* cost of multiply per each bit set */
232 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
233 COSTS_N_INSNS (40), /* HI */
234 COSTS_N_INSNS (40), /* SI */
235 COSTS_N_INSNS (40), /* DI */
236 COSTS_N_INSNS (40)}, /* other */
237 COSTS_N_INSNS (3), /* cost of movsx */
238 COSTS_N_INSNS (2), /* cost of movzx */
239 15, /* "large" insn */
241 4, /* cost for loading QImode using movzbl */
242 {2, 4, 2}, /* cost of loading integer registers
243 in QImode, HImode and SImode.
244 Relative to reg-reg move (2). */
245 {2, 4, 2}, /* cost of storing integer registers */
246 2, /* cost of reg,reg fld/fst */
247 {8, 8, 8}, /* cost of loading fp registers
248 in SFmode, DFmode and XFmode */
249 {8, 8, 8}, /* cost of storing fp registers
250 in SFmode, DFmode and XFmode */
251 2, /* cost of moving MMX register */
252 {4, 8}, /* cost of loading MMX registers
253 in SImode and DImode */
254 {4, 8}, /* cost of storing MMX registers
255 in SImode and DImode */
256 2, /* cost of moving SSE register */
257 {4, 8, 16}, /* cost of loading SSE registers
258 in SImode, DImode and TImode */
259 {4, 8, 16}, /* cost of storing SSE registers
260 in SImode, DImode and TImode */
261 3, /* MMX or SSE register to integer */
262 4, /* size of l1 cache. 486 has 8kB cache
263 shared for code and data, so 4kB is
264 not really precise. */
265 4, /* size of l2 cache */
266 0, /* size of prefetch block */
267 0, /* number of parallel prefetches */
269 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
270 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
271 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
272 COSTS_N_INSNS (3), /* cost of FABS instruction. */
273 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
274 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
275 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
276 DUMMY_STRINGOP_ALGS},
277 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
278 DUMMY_STRINGOP_ALGS},
279 1, /* scalar_stmt_cost. */
280 1, /* scalar load_cost. */
281 1, /* scalar_store_cost. */
282 1, /* vec_stmt_cost. */
283 1, /* vec_to_scalar_cost. */
284 1, /* scalar_to_vec_cost. */
285 1, /* vec_align_load_cost. */
286 2, /* vec_unalign_load_cost. */
287 1, /* vec_store_cost. */
288 3, /* cond_taken_branch_cost. */
289 1, /* cond_not_taken_branch_cost. */
293 struct processor_costs pentium_cost = {
294 COSTS_N_INSNS (1), /* cost of an add instruction */
295 COSTS_N_INSNS (1), /* cost of a lea instruction */
296 COSTS_N_INSNS (4), /* variable shift costs */
297 COSTS_N_INSNS (1), /* constant shift costs */
298 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
299 COSTS_N_INSNS (11), /* HI */
300 COSTS_N_INSNS (11), /* SI */
301 COSTS_N_INSNS (11), /* DI */
302 COSTS_N_INSNS (11)}, /* other */
303 0, /* cost of multiply per each bit set */
304 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
305 COSTS_N_INSNS (25), /* HI */
306 COSTS_N_INSNS (25), /* SI */
307 COSTS_N_INSNS (25), /* DI */
308 COSTS_N_INSNS (25)}, /* other */
309 COSTS_N_INSNS (3), /* cost of movsx */
310 COSTS_N_INSNS (2), /* cost of movzx */
311 8, /* "large" insn */
313 6, /* cost for loading QImode using movzbl */
314 {2, 4, 2}, /* cost of loading integer registers
315 in QImode, HImode and SImode.
316 Relative to reg-reg move (2). */
317 {2, 4, 2}, /* cost of storing integer registers */
318 2, /* cost of reg,reg fld/fst */
319 {2, 2, 6}, /* cost of loading fp registers
320 in SFmode, DFmode and XFmode */
321 {4, 4, 6}, /* cost of storing fp registers
322 in SFmode, DFmode and XFmode */
323 8, /* cost of moving MMX register */
324 {8, 8}, /* cost of loading MMX registers
325 in SImode and DImode */
326 {8, 8}, /* cost of storing MMX registers
327 in SImode and DImode */
328 2, /* cost of moving SSE register */
329 {4, 8, 16}, /* cost of loading SSE registers
330 in SImode, DImode and TImode */
331 {4, 8, 16}, /* cost of storing SSE registers
332 in SImode, DImode and TImode */
333 3, /* MMX or SSE register to integer */
334 8, /* size of l1 cache. */
335 8, /* size of l2 cache */
336 0, /* size of prefetch block */
337 0, /* number of parallel prefetches */
339 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
340 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
341 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
342 COSTS_N_INSNS (1), /* cost of FABS instruction. */
343 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
344 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
345 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
346 DUMMY_STRINGOP_ALGS},
347 {{libcall, {{-1, rep_prefix_4_byte}}},
348 DUMMY_STRINGOP_ALGS},
349 1, /* scalar_stmt_cost. */
350 1, /* scalar load_cost. */
351 1, /* scalar_store_cost. */
352 1, /* vec_stmt_cost. */
353 1, /* vec_to_scalar_cost. */
354 1, /* scalar_to_vec_cost. */
355 1, /* vec_align_load_cost. */
356 2, /* vec_unalign_load_cost. */
357 1, /* vec_store_cost. */
358 3, /* cond_taken_branch_cost. */
359 1, /* cond_not_taken_branch_cost. */
363 struct processor_costs pentiumpro_cost = {
364 COSTS_N_INSNS (1), /* cost of an add instruction */
365 COSTS_N_INSNS (1), /* cost of a lea instruction */
366 COSTS_N_INSNS (1), /* variable shift costs */
367 COSTS_N_INSNS (1), /* constant shift costs */
368 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
369 COSTS_N_INSNS (4), /* HI */
370 COSTS_N_INSNS (4), /* SI */
371 COSTS_N_INSNS (4), /* DI */
372 COSTS_N_INSNS (4)}, /* other */
373 0, /* cost of multiply per each bit set */
374 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
375 COSTS_N_INSNS (17), /* HI */
376 COSTS_N_INSNS (17), /* SI */
377 COSTS_N_INSNS (17), /* DI */
378 COSTS_N_INSNS (17)}, /* other */
379 COSTS_N_INSNS (1), /* cost of movsx */
380 COSTS_N_INSNS (1), /* cost of movzx */
381 8, /* "large" insn */
383 2, /* cost for loading QImode using movzbl */
384 {4, 4, 4}, /* cost of loading integer registers
385 in QImode, HImode and SImode.
386 Relative to reg-reg move (2). */
387 {2, 2, 2}, /* cost of storing integer registers */
388 2, /* cost of reg,reg fld/fst */
389 {2, 2, 6}, /* cost of loading fp registers
390 in SFmode, DFmode and XFmode */
391 {4, 4, 6}, /* cost of storing fp registers
392 in SFmode, DFmode and XFmode */
393 2, /* cost of moving MMX register */
394 {2, 2}, /* cost of loading MMX registers
395 in SImode and DImode */
396 {2, 2}, /* cost of storing MMX registers
397 in SImode and DImode */
398 2, /* cost of moving SSE register */
399 {2, 2, 8}, /* cost of loading SSE registers
400 in SImode, DImode and TImode */
401 {2, 2, 8}, /* cost of storing SSE registers
402 in SImode, DImode and TImode */
403 3, /* MMX or SSE register to integer */
404 8, /* size of l1 cache. */
405 256, /* size of l2 cache */
406 32, /* size of prefetch block */
407 6, /* number of parallel prefetches */
409 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
410 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
411 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
412 COSTS_N_INSNS (2), /* cost of FABS instruction. */
413 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
414 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
415 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
416 (we ensure the alignment). For small blocks inline loop is still a
417 noticeable win, for bigger blocks either rep movsl or rep movsb is
418 way to go. Rep movsb has apparently more expensive startup time in CPU,
419 but after 4K the difference is down in the noise. */
420 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
421 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
422 DUMMY_STRINGOP_ALGS},
423 {{rep_prefix_4_byte, {{1024, unrolled_loop},
424 {8192, rep_prefix_4_byte}, {-1, libcall}}},
425 DUMMY_STRINGOP_ALGS},
426 1, /* scalar_stmt_cost. */
427 1, /* scalar load_cost. */
428 1, /* scalar_store_cost. */
429 1, /* vec_stmt_cost. */
430 1, /* vec_to_scalar_cost. */
431 1, /* scalar_to_vec_cost. */
432 1, /* vec_align_load_cost. */
433 2, /* vec_unalign_load_cost. */
434 1, /* vec_store_cost. */
435 3, /* cond_taken_branch_cost. */
436 1, /* cond_not_taken_branch_cost. */
440 struct processor_costs geode_cost = {
441 COSTS_N_INSNS (1), /* cost of an add instruction */
442 COSTS_N_INSNS (1), /* cost of a lea instruction */
443 COSTS_N_INSNS (2), /* variable shift costs */
444 COSTS_N_INSNS (1), /* constant shift costs */
445 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
446 COSTS_N_INSNS (4), /* HI */
447 COSTS_N_INSNS (7), /* SI */
448 COSTS_N_INSNS (7), /* DI */
449 COSTS_N_INSNS (7)}, /* other */
450 0, /* cost of multiply per each bit set */
451 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
452 COSTS_N_INSNS (23), /* HI */
453 COSTS_N_INSNS (39), /* SI */
454 COSTS_N_INSNS (39), /* DI */
455 COSTS_N_INSNS (39)}, /* other */
456 COSTS_N_INSNS (1), /* cost of movsx */
457 COSTS_N_INSNS (1), /* cost of movzx */
458 8, /* "large" insn */
460 1, /* cost for loading QImode using movzbl */
461 {1, 1, 1}, /* cost of loading integer registers
462 in QImode, HImode and SImode.
463 Relative to reg-reg move (2). */
464 {1, 1, 1}, /* cost of storing integer registers */
465 1, /* cost of reg,reg fld/fst */
466 {1, 1, 1}, /* cost of loading fp registers
467 in SFmode, DFmode and XFmode */
468 {4, 6, 6}, /* cost of storing fp registers
469 in SFmode, DFmode and XFmode */
471 1, /* cost of moving MMX register */
472 {1, 1}, /* cost of loading MMX registers
473 in SImode and DImode */
474 {1, 1}, /* cost of storing MMX registers
475 in SImode and DImode */
476 1, /* cost of moving SSE register */
477 {1, 1, 1}, /* cost of loading SSE registers
478 in SImode, DImode and TImode */
479 {1, 1, 1}, /* cost of storing SSE registers
480 in SImode, DImode and TImode */
481 1, /* MMX or SSE register to integer */
482 64, /* size of l1 cache. */
483 128, /* size of l2 cache. */
484 32, /* size of prefetch block */
485 1, /* number of parallel prefetches */
487 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
488 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
489 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
490 COSTS_N_INSNS (1), /* cost of FABS instruction. */
491 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
492 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
493 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
494 DUMMY_STRINGOP_ALGS},
495 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
496 DUMMY_STRINGOP_ALGS},
497 1, /* scalar_stmt_cost. */
498 1, /* scalar load_cost. */
499 1, /* scalar_store_cost. */
500 1, /* vec_stmt_cost. */
501 1, /* vec_to_scalar_cost. */
502 1, /* scalar_to_vec_cost. */
503 1, /* vec_align_load_cost. */
504 2, /* vec_unalign_load_cost. */
505 1, /* vec_store_cost. */
506 3, /* cond_taken_branch_cost. */
507 1, /* cond_not_taken_branch_cost. */
511 struct processor_costs k6_cost = {
512 COSTS_N_INSNS (1), /* cost of an add instruction */
513 COSTS_N_INSNS (2), /* cost of a lea instruction */
514 COSTS_N_INSNS (1), /* variable shift costs */
515 COSTS_N_INSNS (1), /* constant shift costs */
516 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
517 COSTS_N_INSNS (3), /* HI */
518 COSTS_N_INSNS (3), /* SI */
519 COSTS_N_INSNS (3), /* DI */
520 COSTS_N_INSNS (3)}, /* other */
521 0, /* cost of multiply per each bit set */
522 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
523 COSTS_N_INSNS (18), /* HI */
524 COSTS_N_INSNS (18), /* SI */
525 COSTS_N_INSNS (18), /* DI */
526 COSTS_N_INSNS (18)}, /* other */
527 COSTS_N_INSNS (2), /* cost of movsx */
528 COSTS_N_INSNS (2), /* cost of movzx */
529 8, /* "large" insn */
531 3, /* cost for loading QImode using movzbl */
532 {4, 5, 4}, /* cost of loading integer registers
533 in QImode, HImode and SImode.
534 Relative to reg-reg move (2). */
535 {2, 3, 2}, /* cost of storing integer registers */
536 4, /* cost of reg,reg fld/fst */
537 {6, 6, 6}, /* cost of loading fp registers
538 in SFmode, DFmode and XFmode */
539 {4, 4, 4}, /* cost of storing fp registers
540 in SFmode, DFmode and XFmode */
541 2, /* cost of moving MMX register */
542 {2, 2}, /* cost of loading MMX registers
543 in SImode and DImode */
544 {2, 2}, /* cost of storing MMX registers
545 in SImode and DImode */
546 2, /* cost of moving SSE register */
547 {2, 2, 8}, /* cost of loading SSE registers
548 in SImode, DImode and TImode */
549 {2, 2, 8}, /* cost of storing SSE registers
550 in SImode, DImode and TImode */
551 6, /* MMX or SSE register to integer */
552 32, /* size of l1 cache. */
553 32, /* size of l2 cache. Some models
554 have integrated l2 cache, but
555 optimizing for k6 is not important
556 enough to worry about that. */
557 32, /* size of prefetch block */
558 1, /* number of parallel prefetches */
560 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
561 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
562 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
563 COSTS_N_INSNS (2), /* cost of FABS instruction. */
564 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
565 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
566 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
567 DUMMY_STRINGOP_ALGS},
568 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
569 DUMMY_STRINGOP_ALGS},
570 1, /* scalar_stmt_cost. */
571 1, /* scalar load_cost. */
572 1, /* scalar_store_cost. */
573 1, /* vec_stmt_cost. */
574 1, /* vec_to_scalar_cost. */
575 1, /* scalar_to_vec_cost. */
576 1, /* vec_align_load_cost. */
577 2, /* vec_unalign_load_cost. */
578 1, /* vec_store_cost. */
579 3, /* cond_taken_branch_cost. */
580 1, /* cond_not_taken_branch_cost. */
584 struct processor_costs athlon_cost = {
585 COSTS_N_INSNS (1), /* cost of an add instruction */
586 COSTS_N_INSNS (2), /* cost of a lea instruction */
587 COSTS_N_INSNS (1), /* variable shift costs */
588 COSTS_N_INSNS (1), /* constant shift costs */
589 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
590 COSTS_N_INSNS (5), /* HI */
591 COSTS_N_INSNS (5), /* SI */
592 COSTS_N_INSNS (5), /* DI */
593 COSTS_N_INSNS (5)}, /* other */
594 0, /* cost of multiply per each bit set */
595 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
596 COSTS_N_INSNS (26), /* HI */
597 COSTS_N_INSNS (42), /* SI */
598 COSTS_N_INSNS (74), /* DI */
599 COSTS_N_INSNS (74)}, /* other */
600 COSTS_N_INSNS (1), /* cost of movsx */
601 COSTS_N_INSNS (1), /* cost of movzx */
602 8, /* "large" insn */
604 4, /* cost for loading QImode using movzbl */
605 {3, 4, 3}, /* cost of loading integer registers
606 in QImode, HImode and SImode.
607 Relative to reg-reg move (2). */
608 {3, 4, 3}, /* cost of storing integer registers */
609 4, /* cost of reg,reg fld/fst */
610 {4, 4, 12}, /* cost of loading fp registers
611 in SFmode, DFmode and XFmode */
612 {6, 6, 8}, /* cost of storing fp registers
613 in SFmode, DFmode and XFmode */
614 2, /* cost of moving MMX register */
615 {4, 4}, /* cost of loading MMX registers
616 in SImode and DImode */
617 {4, 4}, /* cost of storing MMX registers
618 in SImode and DImode */
619 2, /* cost of moving SSE register */
620 {4, 4, 6}, /* cost of loading SSE registers
621 in SImode, DImode and TImode */
622 {4, 4, 5}, /* cost of storing SSE registers
623 in SImode, DImode and TImode */
624 5, /* MMX or SSE register to integer */
625 64, /* size of l1 cache. */
626 256, /* size of l2 cache. */
627 64, /* size of prefetch block */
628 6, /* number of parallel prefetches */
630 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
631 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
632 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
633 COSTS_N_INSNS (2), /* cost of FABS instruction. */
634 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
635 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
636 /* For some reason, Athlon deals better with REP prefix (relative to loops)
637 compared to K8. Alignment becomes important after 8 bytes for memcpy and
638 128 bytes for memset. */
639 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
640 DUMMY_STRINGOP_ALGS},
641 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
642 DUMMY_STRINGOP_ALGS},
643 1, /* scalar_stmt_cost. */
644 1, /* scalar load_cost. */
645 1, /* scalar_store_cost. */
646 1, /* vec_stmt_cost. */
647 1, /* vec_to_scalar_cost. */
648 1, /* scalar_to_vec_cost. */
649 1, /* vec_align_load_cost. */
650 2, /* vec_unalign_load_cost. */
651 1, /* vec_store_cost. */
652 3, /* cond_taken_branch_cost. */
653 1, /* cond_not_taken_branch_cost. */
657 struct processor_costs k8_cost = {
658 COSTS_N_INSNS (1), /* cost of an add instruction */
659 COSTS_N_INSNS (2), /* cost of a lea instruction */
660 COSTS_N_INSNS (1), /* variable shift costs */
661 COSTS_N_INSNS (1), /* constant shift costs */
662 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
663 COSTS_N_INSNS (4), /* HI */
664 COSTS_N_INSNS (3), /* SI */
665 COSTS_N_INSNS (4), /* DI */
666 COSTS_N_INSNS (5)}, /* other */
667 0, /* cost of multiply per each bit set */
668 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
669 COSTS_N_INSNS (26), /* HI */
670 COSTS_N_INSNS (42), /* SI */
671 COSTS_N_INSNS (74), /* DI */
672 COSTS_N_INSNS (74)}, /* other */
673 COSTS_N_INSNS (1), /* cost of movsx */
674 COSTS_N_INSNS (1), /* cost of movzx */
675 8, /* "large" insn */
677 4, /* cost for loading QImode using movzbl */
678 {3, 4, 3}, /* cost of loading integer registers
679 in QImode, HImode and SImode.
680 Relative to reg-reg move (2). */
681 {3, 4, 3}, /* cost of storing integer registers */
682 4, /* cost of reg,reg fld/fst */
683 {4, 4, 12}, /* cost of loading fp registers
684 in SFmode, DFmode and XFmode */
685 {6, 6, 8}, /* cost of storing fp registers
686 in SFmode, DFmode and XFmode */
687 2, /* cost of moving MMX register */
688 {3, 3}, /* cost of loading MMX registers
689 in SImode and DImode */
690 {4, 4}, /* cost of storing MMX registers
691 in SImode and DImode */
692 2, /* cost of moving SSE register */
693 {4, 3, 6}, /* cost of loading SSE registers
694 in SImode, DImode and TImode */
695 {4, 4, 5}, /* cost of storing SSE registers
696 in SImode, DImode and TImode */
697 5, /* MMX or SSE register to integer */
698 64, /* size of l1 cache. */
699 512, /* size of l2 cache. */
700 64, /* size of prefetch block */
701 /* New AMD processors never drop prefetches; if they cannot be performed
702 immediately, they are queued. We set number of simultaneous prefetches
703 to a large constant to reflect this (it probably is not a good idea not
704 to limit number of prefetches at all, as their execution also takes some
706 100, /* number of parallel prefetches */
708 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
709 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
710 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
711 COSTS_N_INSNS (2), /* cost of FABS instruction. */
712 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
713 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
714 /* K8 has optimized REP instruction for medium sized blocks, but for very
715 small blocks it is better to use loop. For large blocks, libcall can
716 do nontemporary accesses and beat inline considerably. */
717 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
718 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
719 {{libcall, {{8, loop}, {24, unrolled_loop},
720 {2048, rep_prefix_4_byte}, {-1, libcall}}},
721 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
722 4, /* scalar_stmt_cost. */
723 2, /* scalar load_cost. */
724 2, /* scalar_store_cost. */
725 5, /* vec_stmt_cost. */
726 0, /* vec_to_scalar_cost. */
727 2, /* scalar_to_vec_cost. */
728 2, /* vec_align_load_cost. */
729 3, /* vec_unalign_load_cost. */
730 3, /* vec_store_cost. */
731 3, /* cond_taken_branch_cost. */
732 2, /* cond_not_taken_branch_cost. */
735 struct processor_costs amdfam10_cost = {
736 COSTS_N_INSNS (1), /* cost of an add instruction */
737 COSTS_N_INSNS (2), /* cost of a lea instruction */
738 COSTS_N_INSNS (1), /* variable shift costs */
739 COSTS_N_INSNS (1), /* constant shift costs */
740 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
741 COSTS_N_INSNS (4), /* HI */
742 COSTS_N_INSNS (3), /* SI */
743 COSTS_N_INSNS (4), /* DI */
744 COSTS_N_INSNS (5)}, /* other */
745 0, /* cost of multiply per each bit set */
746 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
747 COSTS_N_INSNS (35), /* HI */
748 COSTS_N_INSNS (51), /* SI */
749 COSTS_N_INSNS (83), /* DI */
750 COSTS_N_INSNS (83)}, /* other */
751 COSTS_N_INSNS (1), /* cost of movsx */
752 COSTS_N_INSNS (1), /* cost of movzx */
753 8, /* "large" insn */
755 4, /* cost for loading QImode using movzbl */
756 {3, 4, 3}, /* cost of loading integer registers
757 in QImode, HImode and SImode.
758 Relative to reg-reg move (2). */
759 {3, 4, 3}, /* cost of storing integer registers */
760 4, /* cost of reg,reg fld/fst */
761 {4, 4, 12}, /* cost of loading fp registers
762 in SFmode, DFmode and XFmode */
763 {6, 6, 8}, /* cost of storing fp registers
764 in SFmode, DFmode and XFmode */
765 2, /* cost of moving MMX register */
766 {3, 3}, /* cost of loading MMX registers
767 in SImode and DImode */
768 {4, 4}, /* cost of storing MMX registers
769 in SImode and DImode */
770 2, /* cost of moving SSE register */
771 {4, 4, 3}, /* cost of loading SSE registers
772 in SImode, DImode and TImode */
773 {4, 4, 5}, /* cost of storing SSE registers
774 in SImode, DImode and TImode */
775 3, /* MMX or SSE register to integer */
777 MOVD reg64, xmmreg Double FSTORE 4
778 MOVD reg32, xmmreg Double FSTORE 4
780 MOVD reg64, xmmreg Double FADD 3
782 MOVD reg32, xmmreg Double FADD 3
784 64, /* size of l1 cache. */
785 512, /* size of l2 cache. */
786 64, /* size of prefetch block */
787 /* New AMD processors never drop prefetches; if they cannot be performed
788 immediately, they are queued. We set number of simultaneous prefetches
789 to a large constant to reflect this (it probably is not a good idea not
790 to limit number of prefetches at all, as their execution also takes some
792 100, /* number of parallel prefetches */
794 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
795 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
796 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
797 COSTS_N_INSNS (2), /* cost of FABS instruction. */
798 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
799 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
801 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
802 very small blocks it is better to use loop. For large blocks, libcall can
803 do nontemporary accesses and beat inline considerably. */
804 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
805 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
806 {{libcall, {{8, loop}, {24, unrolled_loop},
807 {2048, rep_prefix_4_byte}, {-1, libcall}}},
808 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
809 4, /* scalar_stmt_cost. */
810 2, /* scalar load_cost. */
811 2, /* scalar_store_cost. */
812 6, /* vec_stmt_cost. */
813 0, /* vec_to_scalar_cost. */
814 2, /* scalar_to_vec_cost. */
815 2, /* vec_align_load_cost. */
816 2, /* vec_unalign_load_cost. */
817 2, /* vec_store_cost. */
818 2, /* cond_taken_branch_cost. */
819 1, /* cond_not_taken_branch_cost. */
822 struct processor_costs bdver1_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (2), /* cost of a lea instruction */
825 COSTS_N_INSNS (1), /* variable shift costs */
826 COSTS_N_INSNS (1), /* constant shift costs */
827 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (4), /* HI */
829 COSTS_N_INSNS (3), /* SI */
830 COSTS_N_INSNS (4), /* DI */
831 COSTS_N_INSNS (5)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (35), /* HI */
835 COSTS_N_INSNS (51), /* SI */
836 COSTS_N_INSNS (83), /* DI */
837 COSTS_N_INSNS (83)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 8, /* "large" insn */
842 4, /* cost for loading QImode using movzbl */
843 {3, 4, 3}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {3, 4, 3}, /* cost of storing integer registers */
847 4, /* cost of reg,reg fld/fst */
848 {4, 4, 12}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {6, 6, 8}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {3, 3}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {4, 4}, /* cost of storing MMX registers
856 in SImode and DImode */
857 2, /* cost of moving SSE register */
858 {4, 4, 3}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {4, 4, 5}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 3, /* MMX or SSE register to integer */
864 MOVD reg64, xmmreg Double FSTORE 4
865 MOVD reg32, xmmreg Double FSTORE 4
867 MOVD reg64, xmmreg Double FADD 3
869 MOVD reg32, xmmreg Double FADD 3
871 64, /* size of l1 cache. */
872 1024, /* size of l2 cache. */
873 64, /* size of prefetch block */
874 /* New AMD processors never drop prefetches; if they cannot be performed
875 immediately, they are queued. We set number of simultaneous prefetches
876 to a large constant to reflect this (it probably is not a good idea not
877 to limit number of prefetches at all, as their execution also takes some
879 100, /* number of parallel prefetches */
881 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
882 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
883 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
884 COSTS_N_INSNS (2), /* cost of FABS instruction. */
885 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
886 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
888 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
889 very small blocks it is better to use loop. For large blocks, libcall
890 can do nontemporary accesses and beat inline considerably. */
891 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
892 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
893 {{libcall, {{8, loop}, {24, unrolled_loop},
894 {2048, rep_prefix_4_byte}, {-1, libcall}}},
895 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
896 4, /* scalar_stmt_cost. */
897 2, /* scalar load_cost. */
898 2, /* scalar_store_cost. */
899 6, /* vec_stmt_cost. */
900 0, /* vec_to_scalar_cost. */
901 2, /* scalar_to_vec_cost. */
902 2, /* vec_align_load_cost. */
903 2, /* vec_unalign_load_cost. */
904 2, /* vec_store_cost. */
905 2, /* cond_taken_branch_cost. */
906 1, /* cond_not_taken_branch_cost. */
910 struct processor_costs pentium4_cost = {
911 COSTS_N_INSNS (1), /* cost of an add instruction */
912 COSTS_N_INSNS (3), /* cost of a lea instruction */
913 COSTS_N_INSNS (4), /* variable shift costs */
914 COSTS_N_INSNS (4), /* constant shift costs */
915 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
916 COSTS_N_INSNS (15), /* HI */
917 COSTS_N_INSNS (15), /* SI */
918 COSTS_N_INSNS (15), /* DI */
919 COSTS_N_INSNS (15)}, /* other */
920 0, /* cost of multiply per each bit set */
921 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
922 COSTS_N_INSNS (56), /* HI */
923 COSTS_N_INSNS (56), /* SI */
924 COSTS_N_INSNS (56), /* DI */
925 COSTS_N_INSNS (56)}, /* other */
926 COSTS_N_INSNS (1), /* cost of movsx */
927 COSTS_N_INSNS (1), /* cost of movzx */
928 16, /* "large" insn */
930 2, /* cost for loading QImode using movzbl */
931 {4, 5, 4}, /* cost of loading integer registers
932 in QImode, HImode and SImode.
933 Relative to reg-reg move (2). */
934 {2, 3, 2}, /* cost of storing integer registers */
935 2, /* cost of reg,reg fld/fst */
936 {2, 2, 6}, /* cost of loading fp registers
937 in SFmode, DFmode and XFmode */
938 {4, 4, 6}, /* cost of storing fp registers
939 in SFmode, DFmode and XFmode */
940 2, /* cost of moving MMX register */
941 {2, 2}, /* cost of loading MMX registers
942 in SImode and DImode */
943 {2, 2}, /* cost of storing MMX registers
944 in SImode and DImode */
945 12, /* cost of moving SSE register */
946 {12, 12, 12}, /* cost of loading SSE registers
947 in SImode, DImode and TImode */
948 {2, 2, 8}, /* cost of storing SSE registers
949 in SImode, DImode and TImode */
950 10, /* MMX or SSE register to integer */
951 8, /* size of l1 cache. */
952 256, /* size of l2 cache. */
953 64, /* size of prefetch block */
954 6, /* number of parallel prefetches */
956 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
957 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
958 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
959 COSTS_N_INSNS (2), /* cost of FABS instruction. */
960 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
961 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
962 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
963 DUMMY_STRINGOP_ALGS},
964 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
966 DUMMY_STRINGOP_ALGS},
967 1, /* scalar_stmt_cost. */
968 1, /* scalar load_cost. */
969 1, /* scalar_store_cost. */
970 1, /* vec_stmt_cost. */
971 1, /* vec_to_scalar_cost. */
972 1, /* scalar_to_vec_cost. */
973 1, /* vec_align_load_cost. */
974 2, /* vec_unalign_load_cost. */
975 1, /* vec_store_cost. */
976 3, /* cond_taken_branch_cost. */
977 1, /* cond_not_taken_branch_cost. */
981 struct processor_costs nocona_cost = {
982 COSTS_N_INSNS (1), /* cost of an add instruction */
983 COSTS_N_INSNS (1), /* cost of a lea instruction */
984 COSTS_N_INSNS (1), /* variable shift costs */
985 COSTS_N_INSNS (1), /* constant shift costs */
986 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
987 COSTS_N_INSNS (10), /* HI */
988 COSTS_N_INSNS (10), /* SI */
989 COSTS_N_INSNS (10), /* DI */
990 COSTS_N_INSNS (10)}, /* other */
991 0, /* cost of multiply per each bit set */
992 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
993 COSTS_N_INSNS (66), /* HI */
994 COSTS_N_INSNS (66), /* SI */
995 COSTS_N_INSNS (66), /* DI */
996 COSTS_N_INSNS (66)}, /* other */
997 COSTS_N_INSNS (1), /* cost of movsx */
998 COSTS_N_INSNS (1), /* cost of movzx */
999 16, /* "large" insn */
1000 17, /* MOVE_RATIO */
1001 4, /* cost for loading QImode using movzbl */
1002 {4, 4, 4}, /* cost of loading integer registers
1003 in QImode, HImode and SImode.
1004 Relative to reg-reg move (2). */
1005 {4, 4, 4}, /* cost of storing integer registers */
1006 3, /* cost of reg,reg fld/fst */
1007 {12, 12, 12}, /* cost of loading fp registers
1008 in SFmode, DFmode and XFmode */
1009 {4, 4, 4}, /* cost of storing fp registers
1010 in SFmode, DFmode and XFmode */
1011 6, /* cost of moving MMX register */
1012 {12, 12}, /* cost of loading MMX registers
1013 in SImode and DImode */
1014 {12, 12}, /* cost of storing MMX registers
1015 in SImode and DImode */
1016 6, /* cost of moving SSE register */
1017 {12, 12, 12}, /* cost of loading SSE registers
1018 in SImode, DImode and TImode */
1019 {12, 12, 12}, /* cost of storing SSE registers
1020 in SImode, DImode and TImode */
1021 8, /* MMX or SSE register to integer */
1022 8, /* size of l1 cache. */
1023 1024, /* size of l2 cache. */
1024 128, /* size of prefetch block */
1025 8, /* number of parallel prefetches */
1026 1, /* Branch cost */
1027 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1028 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1029 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1030 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1031 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1032 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1033 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1034 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1035 {100000, unrolled_loop}, {-1, libcall}}}},
1036 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1038 {libcall, {{24, loop}, {64, unrolled_loop},
1039 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1040 1, /* scalar_stmt_cost. */
1041 1, /* scalar load_cost. */
1042 1, /* scalar_store_cost. */
1043 1, /* vec_stmt_cost. */
1044 1, /* vec_to_scalar_cost. */
1045 1, /* scalar_to_vec_cost. */
1046 1, /* vec_align_load_cost. */
1047 2, /* vec_unalign_load_cost. */
1048 1, /* vec_store_cost. */
1049 3, /* cond_taken_branch_cost. */
1050 1, /* cond_not_taken_branch_cost. */
1054 struct processor_costs core2_cost = {
1055 COSTS_N_INSNS (1), /* cost of an add instruction */
1056 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1057 COSTS_N_INSNS (1), /* variable shift costs */
1058 COSTS_N_INSNS (1), /* constant shift costs */
1059 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1060 COSTS_N_INSNS (3), /* HI */
1061 COSTS_N_INSNS (3), /* SI */
1062 COSTS_N_INSNS (3), /* DI */
1063 COSTS_N_INSNS (3)}, /* other */
1064 0, /* cost of multiply per each bit set */
1065 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
1066 COSTS_N_INSNS (22), /* HI */
1067 COSTS_N_INSNS (22), /* SI */
1068 COSTS_N_INSNS (22), /* DI */
1069 COSTS_N_INSNS (22)}, /* other */
1070 COSTS_N_INSNS (1), /* cost of movsx */
1071 COSTS_N_INSNS (1), /* cost of movzx */
1072 8, /* "large" insn */
1073 16, /* MOVE_RATIO */
1074 2, /* cost for loading QImode using movzbl */
1075 {6, 6, 6}, /* cost of loading integer registers
1076 in QImode, HImode and SImode.
1077 Relative to reg-reg move (2). */
1078 {4, 4, 4}, /* cost of storing integer registers */
1079 2, /* cost of reg,reg fld/fst */
1080 {6, 6, 6}, /* cost of loading fp registers
1081 in SFmode, DFmode and XFmode */
1082 {4, 4, 4}, /* cost of storing fp registers
1083 in SFmode, DFmode and XFmode */
1084 2, /* cost of moving MMX register */
1085 {6, 6}, /* cost of loading MMX registers
1086 in SImode and DImode */
1087 {4, 4}, /* cost of storing MMX registers
1088 in SImode and DImode */
1089 2, /* cost of moving SSE register */
1090 {6, 6, 6}, /* cost of loading SSE registers
1091 in SImode, DImode and TImode */
1092 {4, 4, 4}, /* cost of storing SSE registers
1093 in SImode, DImode and TImode */
1094 2, /* MMX or SSE register to integer */
1095 32, /* size of l1 cache. */
1096 2048, /* size of l2 cache. */
1097 128, /* size of prefetch block */
1098 8, /* number of parallel prefetches */
1099 3, /* Branch cost */
1100 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1101 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1102 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1103 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1104 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1105 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1106 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1107 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1108 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1109 {{libcall, {{8, loop}, {15, unrolled_loop},
1110 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1111 {libcall, {{24, loop}, {32, unrolled_loop},
1112 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1113 1, /* scalar_stmt_cost. */
1114 1, /* scalar load_cost. */
1115 1, /* scalar_store_cost. */
1116 1, /* vec_stmt_cost. */
1117 1, /* vec_to_scalar_cost. */
1118 1, /* scalar_to_vec_cost. */
1119 1, /* vec_align_load_cost. */
1120 2, /* vec_unalign_load_cost. */
1121 1, /* vec_store_cost. */
1122 3, /* cond_taken_branch_cost. */
1123 1, /* cond_not_taken_branch_cost. */
1127 struct processor_costs atom_cost = {
1128 COSTS_N_INSNS (1), /* cost of an add instruction */
1129 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1130 COSTS_N_INSNS (1), /* variable shift costs */
1131 COSTS_N_INSNS (1), /* constant shift costs */
1132 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1133 COSTS_N_INSNS (4), /* HI */
1134 COSTS_N_INSNS (3), /* SI */
1135 COSTS_N_INSNS (4), /* DI */
1136 COSTS_N_INSNS (2)}, /* other */
1137 0, /* cost of multiply per each bit set */
1138 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1139 COSTS_N_INSNS (26), /* HI */
1140 COSTS_N_INSNS (42), /* SI */
1141 COSTS_N_INSNS (74), /* DI */
1142 COSTS_N_INSNS (74)}, /* other */
1143 COSTS_N_INSNS (1), /* cost of movsx */
1144 COSTS_N_INSNS (1), /* cost of movzx */
1145 8, /* "large" insn */
1146 17, /* MOVE_RATIO */
1147 2, /* cost for loading QImode using movzbl */
1148 {4, 4, 4}, /* cost of loading integer registers
1149 in QImode, HImode and SImode.
1150 Relative to reg-reg move (2). */
1151 {4, 4, 4}, /* cost of storing integer registers */
1152 4, /* cost of reg,reg fld/fst */
1153 {12, 12, 12}, /* cost of loading fp registers
1154 in SFmode, DFmode and XFmode */
1155 {6, 6, 8}, /* cost of storing fp registers
1156 in SFmode, DFmode and XFmode */
1157 2, /* cost of moving MMX register */
1158 {8, 8}, /* cost of loading MMX registers
1159 in SImode and DImode */
1160 {8, 8}, /* cost of storing MMX registers
1161 in SImode and DImode */
1162 2, /* cost of moving SSE register */
1163 {8, 8, 8}, /* cost of loading SSE registers
1164 in SImode, DImode and TImode */
1165 {8, 8, 8}, /* cost of storing SSE registers
1166 in SImode, DImode and TImode */
1167 5, /* MMX or SSE register to integer */
1168 32, /* size of l1 cache. */
1169 256, /* size of l2 cache. */
1170 64, /* size of prefetch block */
1171 6, /* number of parallel prefetches */
1172 3, /* Branch cost */
1173 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1174 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1175 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1176 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1177 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1178 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1179 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1180 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1181 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1182 {{libcall, {{8, loop}, {15, unrolled_loop},
1183 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1184 {libcall, {{24, loop}, {32, unrolled_loop},
1185 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1186 1, /* scalar_stmt_cost. */
1187 1, /* scalar load_cost. */
1188 1, /* scalar_store_cost. */
1189 1, /* vec_stmt_cost. */
1190 1, /* vec_to_scalar_cost. */
1191 1, /* scalar_to_vec_cost. */
1192 1, /* vec_align_load_cost. */
1193 2, /* vec_unalign_load_cost. */
1194 1, /* vec_store_cost. */
1195 3, /* cond_taken_branch_cost. */
1196 1, /* cond_not_taken_branch_cost. */
1199 /* Generic64 should produce code tuned for Nocona and K8. */
1201 struct processor_costs generic64_cost = {
1202 COSTS_N_INSNS (1), /* cost of an add instruction */
1203 /* On all chips taken into consideration lea is 2 cycles and more. With
1204 this cost however our current implementation of synth_mult results in
1205 use of unnecessary temporary registers causing regression on several
1206 SPECfp benchmarks. */
1207 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1208 COSTS_N_INSNS (1), /* variable shift costs */
1209 COSTS_N_INSNS (1), /* constant shift costs */
1210 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1211 COSTS_N_INSNS (4), /* HI */
1212 COSTS_N_INSNS (3), /* SI */
1213 COSTS_N_INSNS (4), /* DI */
1214 COSTS_N_INSNS (2)}, /* other */
1215 0, /* cost of multiply per each bit set */
1216 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1217 COSTS_N_INSNS (26), /* HI */
1218 COSTS_N_INSNS (42), /* SI */
1219 COSTS_N_INSNS (74), /* DI */
1220 COSTS_N_INSNS (74)}, /* other */
1221 COSTS_N_INSNS (1), /* cost of movsx */
1222 COSTS_N_INSNS (1), /* cost of movzx */
1223 8, /* "large" insn */
1224 17, /* MOVE_RATIO */
1225 4, /* cost for loading QImode using movzbl */
1226 {4, 4, 4}, /* cost of loading integer registers
1227 in QImode, HImode and SImode.
1228 Relative to reg-reg move (2). */
1229 {4, 4, 4}, /* cost of storing integer registers */
1230 4, /* cost of reg,reg fld/fst */
1231 {12, 12, 12}, /* cost of loading fp registers
1232 in SFmode, DFmode and XFmode */
1233 {6, 6, 8}, /* cost of storing fp registers
1234 in SFmode, DFmode and XFmode */
1235 2, /* cost of moving MMX register */
1236 {8, 8}, /* cost of loading MMX registers
1237 in SImode and DImode */
1238 {8, 8}, /* cost of storing MMX registers
1239 in SImode and DImode */
1240 2, /* cost of moving SSE register */
1241 {8, 8, 8}, /* cost of loading SSE registers
1242 in SImode, DImode and TImode */
1243 {8, 8, 8}, /* cost of storing SSE registers
1244 in SImode, DImode and TImode */
1245 5, /* MMX or SSE register to integer */
1246 32, /* size of l1 cache. */
1247 512, /* size of l2 cache. */
1248 64, /* size of prefetch block */
1249 6, /* number of parallel prefetches */
1250 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1251 value is increased to perhaps more appropriate value of 5. */
1252 3, /* Branch cost */
1253 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1254 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1255 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1256 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1257 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1258 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1259 {DUMMY_STRINGOP_ALGS,
1260 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1261 {DUMMY_STRINGOP_ALGS,
1262 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1263 1, /* scalar_stmt_cost. */
1264 1, /* scalar load_cost. */
1265 1, /* scalar_store_cost. */
1266 1, /* vec_stmt_cost. */
1267 1, /* vec_to_scalar_cost. */
1268 1, /* scalar_to_vec_cost. */
1269 1, /* vec_align_load_cost. */
1270 2, /* vec_unalign_load_cost. */
1271 1, /* vec_store_cost. */
1272 3, /* cond_taken_branch_cost. */
1273 1, /* cond_not_taken_branch_cost. */
1276 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1279 struct processor_costs generic32_cost = {
1280 COSTS_N_INSNS (1), /* cost of an add instruction */
1281 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1282 COSTS_N_INSNS (1), /* variable shift costs */
1283 COSTS_N_INSNS (1), /* constant shift costs */
1284 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1285 COSTS_N_INSNS (4), /* HI */
1286 COSTS_N_INSNS (3), /* SI */
1287 COSTS_N_INSNS (4), /* DI */
1288 COSTS_N_INSNS (2)}, /* other */
1289 0, /* cost of multiply per each bit set */
1290 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1291 COSTS_N_INSNS (26), /* HI */
1292 COSTS_N_INSNS (42), /* SI */
1293 COSTS_N_INSNS (74), /* DI */
1294 COSTS_N_INSNS (74)}, /* other */
1295 COSTS_N_INSNS (1), /* cost of movsx */
1296 COSTS_N_INSNS (1), /* cost of movzx */
1297 8, /* "large" insn */
1298 17, /* MOVE_RATIO */
1299 4, /* cost for loading QImode using movzbl */
1300 {4, 4, 4}, /* cost of loading integer registers
1301 in QImode, HImode and SImode.
1302 Relative to reg-reg move (2). */
1303 {4, 4, 4}, /* cost of storing integer registers */
1304 4, /* cost of reg,reg fld/fst */
1305 {12, 12, 12}, /* cost of loading fp registers
1306 in SFmode, DFmode and XFmode */
1307 {6, 6, 8}, /* cost of storing fp registers
1308 in SFmode, DFmode and XFmode */
1309 2, /* cost of moving MMX register */
1310 {8, 8}, /* cost of loading MMX registers
1311 in SImode and DImode */
1312 {8, 8}, /* cost of storing MMX registers
1313 in SImode and DImode */
1314 2, /* cost of moving SSE register */
1315 {8, 8, 8}, /* cost of loading SSE registers
1316 in SImode, DImode and TImode */
1317 {8, 8, 8}, /* cost of storing SSE registers
1318 in SImode, DImode and TImode */
1319 5, /* MMX or SSE register to integer */
1320 32, /* size of l1 cache. */
1321 256, /* size of l2 cache. */
1322 64, /* size of prefetch block */
1323 6, /* number of parallel prefetches */
1324 3, /* Branch cost */
1325 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1326 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1327 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1328 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1329 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1330 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1331 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1332 DUMMY_STRINGOP_ALGS},
1333 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1334 DUMMY_STRINGOP_ALGS},
1335 1, /* scalar_stmt_cost. */
1336 1, /* scalar load_cost. */
1337 1, /* scalar_store_cost. */
1338 1, /* vec_stmt_cost. */
1339 1, /* vec_to_scalar_cost. */
1340 1, /* scalar_to_vec_cost. */
1341 1, /* vec_align_load_cost. */
1342 2, /* vec_unalign_load_cost. */
1343 1, /* vec_store_cost. */
1344 3, /* cond_taken_branch_cost. */
1345 1, /* cond_not_taken_branch_cost. */
1348 const struct processor_costs *ix86_cost = &pentium_cost;
1350 /* Processor feature/optimization bitmasks. */
1351 #define m_386 (1<<PROCESSOR_I386)
1352 #define m_486 (1<<PROCESSOR_I486)
1353 #define m_PENT (1<<PROCESSOR_PENTIUM)
1354 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1355 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1356 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1357 #define m_CORE2 (1<<PROCESSOR_CORE2)
1358 #define m_ATOM (1<<PROCESSOR_ATOM)
1360 #define m_GEODE (1<<PROCESSOR_GEODE)
1361 #define m_K6 (1<<PROCESSOR_K6)
1362 #define m_K6_GEODE (m_K6 | m_GEODE)
1363 #define m_K8 (1<<PROCESSOR_K8)
1364 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1365 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1366 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1367 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1368 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10 | m_BDVER1)
1370 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1371 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1373 /* Generic instruction choice should be common subset of supported CPUs
1374 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1375 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1377 /* Feature tests against the various tunings. */
1378 unsigned char ix86_tune_features[X86_TUNE_LAST];
1380 /* Feature tests against the various tunings used to create ix86_tune_features
1381 based on the processor mask. */
1382 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1383 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1384 negatively, so enabling for Generic64 seems like good code size
1385 tradeoff. We can't enable it for 32bit generic because it does not
1386 work well with PPro base chips. */
1387 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1389 /* X86_TUNE_PUSH_MEMORY */
1390 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1391 | m_NOCONA | m_CORE2 | m_GENERIC,
1393 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1396 /* X86_TUNE_UNROLL_STRLEN */
1397 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1398 | m_CORE2 | m_GENERIC,
1400 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1401 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1403 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1404 on simulation result. But after P4 was made, no performance benefit
1405 was observed with branch hints. It also increases the code size.
1406 As a result, icc never generates branch hints. */
1409 /* X86_TUNE_DOUBLE_WITH_ADD */
1412 /* X86_TUNE_USE_SAHF */
1413 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER1 | m_PENT4
1414 | m_NOCONA | m_CORE2 | m_GENERIC,
1416 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1417 partial dependencies. */
1418 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1419 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1421 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1422 register stalls on Generic32 compilation setting as well. However
1423 in current implementation the partial register stalls are not eliminated
1424 very well - they can be introduced via subregs synthesized by combine
1425 and can happen in caller/callee saving sequences. Because this option
1426 pays back little on PPro based chips and is in conflict with partial reg
1427 dependencies used by Athlon/P4 based chips, it is better to leave it off
1428 for generic32 for now. */
1431 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1432 m_CORE2 | m_GENERIC,
1434 /* X86_TUNE_USE_HIMODE_FIOP */
1435 m_386 | m_486 | m_K6_GEODE,
1437 /* X86_TUNE_USE_SIMODE_FIOP */
1438 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
1440 /* X86_TUNE_USE_MOV0 */
1443 /* X86_TUNE_USE_CLTD */
1444 ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
1446 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1449 /* X86_TUNE_SPLIT_LONG_MOVES */
1452 /* X86_TUNE_READ_MODIFY_WRITE */
1455 /* X86_TUNE_READ_MODIFY */
1458 /* X86_TUNE_PROMOTE_QIMODE */
1459 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1460 | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
1462 /* X86_TUNE_FAST_PREFIX */
1463 ~(m_PENT | m_486 | m_386),
1465 /* X86_TUNE_SINGLE_STRINGOP */
1466 m_386 | m_PENT4 | m_NOCONA,
1468 /* X86_TUNE_QIMODE_MATH */
1471 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1472 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1473 might be considered for Generic32 if our scheme for avoiding partial
1474 stalls was more effective. */
1477 /* X86_TUNE_PROMOTE_QI_REGS */
1480 /* X86_TUNE_PROMOTE_HI_REGS */
1483 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1484 m_ATOM | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA
1485 | m_CORE2 | m_GENERIC,
1487 /* X86_TUNE_ADD_ESP_8 */
1488 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_K6_GEODE | m_386
1489 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1491 /* X86_TUNE_SUB_ESP_4 */
1492 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2
1495 /* X86_TUNE_SUB_ESP_8 */
1496 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_386 | m_486
1497 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1499 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1500 for DFmode copies */
1501 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1502 | m_GENERIC | m_GEODE),
1504 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1505 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1507 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1508 conflict here in between PPro/Pentium4 based chips that thread 128bit
1509 SSE registers as single units versus K8 based chips that divide SSE
1510 registers to two 64bit halves. This knob promotes all store destinations
1511 to be 128bit to allow register renaming on 128bit SSE units, but usually
1512 results in one extra microop on 64bit SSE units. Experimental results
1513 shows that disabling this option on P4 brings over 20% SPECfp regression,
1514 while enabling it on K8 brings roughly 2.4% regression that can be partly
1515 masked by careful scheduling of moves. */
1516 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
1517 | m_AMDFAM10 | m_BDVER1,
1519 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
1520 m_AMDFAM10 | m_BDVER1,
1522 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
1525 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
1528 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1529 are resolved on SSE register parts instead of whole registers, so we may
1530 maintain just lower part of scalar values in proper format leaving the
1531 upper part undefined. */
1534 /* X86_TUNE_SSE_TYPELESS_STORES */
1537 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1538 m_PPRO | m_PENT4 | m_NOCONA,
1540 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1541 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1543 /* X86_TUNE_PROLOGUE_USING_MOVE */
1544 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1546 /* X86_TUNE_EPILOGUE_USING_MOVE */
1547 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1549 /* X86_TUNE_SHIFT1 */
1552 /* X86_TUNE_USE_FFREEP */
1555 /* X86_TUNE_INTER_UNIT_MOVES */
1556 ~(m_AMD_MULTIPLE | m_GENERIC),
1558 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1559 ~(m_AMDFAM10 | m_BDVER1),
1561 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1562 than 4 branch instructions in the 16 byte window. */
1563 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
1566 /* X86_TUNE_SCHEDULE */
1567 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
1570 /* X86_TUNE_USE_BT */
1571 m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
1573 /* X86_TUNE_USE_INCDEC */
1574 ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
1576 /* X86_TUNE_PAD_RETURNS */
1577 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1579 /* X86_TUNE_EXT_80387_CONSTANTS */
1580 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
1581 | m_CORE2 | m_GENERIC,
1583 /* X86_TUNE_SHORTEN_X87_SSE */
1586 /* X86_TUNE_AVOID_VECTOR_DECODE */
1589 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1590 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1593 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1594 vector path on AMD machines. */
1595 m_K8 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1,
1597 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1599 m_K8 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1,
1601 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1605 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1606 but one byte longer. */
1609 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1610 operand that cannot be represented using a modRM byte. The XOR
1611 replacement is long decoded, so this split helps here as well. */
1614 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1616 m_AMDFAM10 | m_GENERIC,
1618 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1619 from integer to FP. */
1622 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1623 with a subsequent conditional jump instruction into a single
1624 compare-and-branch uop. */
1627 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1628 will impact LEA instruction selection. */
1632 /* Feature tests against the various architecture variations. */
1633 unsigned char ix86_arch_features[X86_ARCH_LAST];
1635 /* Feature tests against the various architecture variations, used to create
1636 ix86_arch_features based on the processor mask. */
1637 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1638 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1639 ~(m_386 | m_486 | m_PENT | m_K6),
1641 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1644 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1647 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1650 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1654 static const unsigned int x86_accumulate_outgoing_args
1655 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1658 static const unsigned int x86_arch_always_fancy_math_387
1659 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1660 | m_NOCONA | m_CORE2 | m_GENERIC;
1662 static enum stringop_alg stringop_alg = no_stringop;
1664 /* In case the average insn count for single function invocation is
1665 lower than this constant, emit fast (but longer) prologue and
1667 #define FAST_PROLOGUE_INSN_COUNT 20
1669 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1670 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1671 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1672 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1674 /* Array of the smallest class containing reg number REGNO, indexed by
1675 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1677 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1679 /* ax, dx, cx, bx */
1680 AREG, DREG, CREG, BREG,
1681 /* si, di, bp, sp */
1682 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1684 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1685 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1688 /* flags, fpsr, fpcr, frame */
1689 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1691 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1694 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1697 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1698 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1699 /* SSE REX registers */
1700 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1704 /* The "default" register map used in 32bit mode. */
1706 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1708 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1709 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1710 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1711 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1712 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1713 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1714 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1717 /* The "default" register map used in 64bit mode. */
1719 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1721 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1722 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1723 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1724 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1725 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1726 8,9,10,11,12,13,14,15, /* extended integer registers */
1727 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1730 /* Define the register numbers to be used in Dwarf debugging information.
1731 The SVR4 reference port C compiler uses the following register numbers
1732 in its Dwarf output code:
1733 0 for %eax (gcc regno = 0)
1734 1 for %ecx (gcc regno = 2)
1735 2 for %edx (gcc regno = 1)
1736 3 for %ebx (gcc regno = 3)
1737 4 for %esp (gcc regno = 7)
1738 5 for %ebp (gcc regno = 6)
1739 6 for %esi (gcc regno = 4)
1740 7 for %edi (gcc regno = 5)
1741 The following three DWARF register numbers are never generated by
1742 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1743 believes these numbers have these meanings.
1744 8 for %eip (no gcc equivalent)
1745 9 for %eflags (gcc regno = 17)
1746 10 for %trapno (no gcc equivalent)
1747 It is not at all clear how we should number the FP stack registers
1748 for the x86 architecture. If the version of SDB on x86/svr4 were
1749 a bit less brain dead with respect to floating-point then we would
1750 have a precedent to follow with respect to DWARF register numbers
1751 for x86 FP registers, but the SDB on x86/svr4 is so completely
1752 broken with respect to FP registers that it is hardly worth thinking
1753 of it as something to strive for compatibility with.
1754 The version of x86/svr4 SDB I have at the moment does (partially)
1755 seem to believe that DWARF register number 11 is associated with
1756 the x86 register %st(0), but that's about all. Higher DWARF
1757 register numbers don't seem to be associated with anything in
1758 particular, and even for DWARF regno 11, SDB only seems to under-
1759 stand that it should say that a variable lives in %st(0) (when
1760 asked via an `=' command) if we said it was in DWARF regno 11,
1761 but SDB still prints garbage when asked for the value of the
1762 variable in question (via a `/' command).
1763 (Also note that the labels SDB prints for various FP stack regs
1764 when doing an `x' command are all wrong.)
1765 Note that these problems generally don't affect the native SVR4
1766 C compiler because it doesn't allow the use of -O with -g and
1767 because when it is *not* optimizing, it allocates a memory
1768 location for each floating-point variable, and the memory
1769 location is what gets described in the DWARF AT_location
1770 attribute for the variable in question.
1771 Regardless of the severe mental illness of the x86/svr4 SDB, we
1772 do something sensible here and we use the following DWARF
1773 register numbers. Note that these are all stack-top-relative
1775 11 for %st(0) (gcc regno = 8)
1776 12 for %st(1) (gcc regno = 9)
1777 13 for %st(2) (gcc regno = 10)
1778 14 for %st(3) (gcc regno = 11)
1779 15 for %st(4) (gcc regno = 12)
1780 16 for %st(5) (gcc regno = 13)
1781 17 for %st(6) (gcc regno = 14)
1782 18 for %st(7) (gcc regno = 15)
1784 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1786 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1787 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1788 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1789 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1790 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1791 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1792 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1795 /* Test and compare insns in i386.md store the information needed to
1796 generate branch and scc insns here. */
1798 rtx ix86_compare_op0 = NULL_RTX;
1799 rtx ix86_compare_op1 = NULL_RTX;
1801 /* Define parameter passing and return registers. */
1803 static int const x86_64_int_parameter_registers[6] =
1805 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
1808 static int const x86_64_ms_abi_int_parameter_registers[4] =
1810 CX_REG, DX_REG, R8_REG, R9_REG
1813 static int const x86_64_int_return_registers[4] =
1815 AX_REG, DX_REG, DI_REG, SI_REG
1818 /* Define the structure for the machine field in struct function. */
1820 struct GTY(()) stack_local_entry {
1821 unsigned short mode;
1824 struct stack_local_entry *next;
1827 /* Structure describing stack frame layout.
1828 Stack grows downward:
1834 saved frame pointer if frame_pointer_needed
1835 <- HARD_FRAME_POINTER
1844 [va_arg registers] (
1845 > to_allocate <- FRAME_POINTER
1858 HOST_WIDE_INT frame;
1860 int outgoing_arguments_size;
1862 HOST_WIDE_INT to_allocate;
1863 /* The offsets relative to ARG_POINTER. */
1864 HOST_WIDE_INT frame_pointer_offset;
1865 HOST_WIDE_INT hard_frame_pointer_offset;
1866 HOST_WIDE_INT stack_pointer_offset;
1868 /* When save_regs_using_mov is set, emit prologue using
1869 move instead of push instructions. */
1870 bool save_regs_using_mov;
1873 /* Code model option. */
1874 enum cmodel ix86_cmodel;
1876 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1878 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1880 /* Which unit we are generating floating point math for. */
1881 enum fpmath_unit ix86_fpmath;
1883 /* Which cpu are we scheduling for. */
1884 enum attr_cpu ix86_schedule;
1886 /* Which cpu are we optimizing for. */
1887 enum processor_type ix86_tune;
1889 /* Which instruction set architecture to use. */
1890 enum processor_type ix86_arch;
1892 /* true if sse prefetch instruction is not NOOP. */
1893 int x86_prefetch_sse;
1895 /* ix86_regparm_string as a number */
1896 static int ix86_regparm;
1898 /* -mstackrealign option */
1899 extern int ix86_force_align_arg_pointer;
1900 static const char ix86_force_align_arg_pointer_string[]
1901 = "force_align_arg_pointer";
1903 static rtx (*ix86_gen_leave) (void);
1904 static rtx (*ix86_gen_pop1) (rtx);
1905 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1906 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1907 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
1908 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1909 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1910 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1911 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
1912 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
1913 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
1915 /* Preferred alignment for stack boundary in bits. */
1916 unsigned int ix86_preferred_stack_boundary;
1918 /* Alignment for incoming stack boundary in bits specified at
1920 static unsigned int ix86_user_incoming_stack_boundary;
1922 /* Default alignment for incoming stack boundary in bits. */
1923 static unsigned int ix86_default_incoming_stack_boundary;
1925 /* Alignment for incoming stack boundary in bits. */
1926 unsigned int ix86_incoming_stack_boundary;
1928 /* The abi used by target. */
1929 enum calling_abi ix86_abi;
1931 /* Values 1-5: see jump.c */
1932 int ix86_branch_cost;
1934 /* Calling abi specific va_list type nodes. */
1935 static GTY(()) tree sysv_va_list_type_node;
1936 static GTY(()) tree ms_va_list_type_node;
1938 /* Variables which are this size or smaller are put in the data/bss
1939 or ldata/lbss sections. */
1941 int ix86_section_threshold = 65536;
1943 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1944 char internal_label_prefix[16];
1945 int internal_label_prefix_len;
1947 /* Fence to use after loop using movnt. */
1950 /* Register class used for passing given 64bit part of the argument.
1951 These represent classes as documented by the PS ABI, with the exception
1952 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1953 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1955 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1956 whenever possible (upper half does contain padding). */
1957 enum x86_64_reg_class
1960 X86_64_INTEGER_CLASS,
1961 X86_64_INTEGERSI_CLASS,
1968 X86_64_COMPLEX_X87_CLASS,
1972 #define MAX_CLASSES 4
1974 /* Table of constants used by fldpi, fldln2, etc.... */
1975 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1976 static bool ext_80387_constants_init = 0;
1979 static struct machine_function * ix86_init_machine_status (void);
1980 static rtx ix86_function_value (const_tree, const_tree, bool);
1981 static bool ix86_function_value_regno_p (const unsigned int);
1982 static rtx ix86_static_chain (const_tree, bool);
1983 static int ix86_function_regparm (const_tree, const_tree);
1984 static void ix86_compute_frame_layout (struct ix86_frame *);
1985 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1987 static void ix86_add_new_builtins (int);
1988 static rtx ix86_expand_vec_perm_builtin (tree);
1989 static tree ix86_canonical_va_list_type (tree);
1991 enum ix86_function_specific_strings
1993 IX86_FUNCTION_SPECIFIC_ARCH,
1994 IX86_FUNCTION_SPECIFIC_TUNE,
1995 IX86_FUNCTION_SPECIFIC_FPMATH,
1996 IX86_FUNCTION_SPECIFIC_MAX
1999 static char *ix86_target_string (int, int, const char *, const char *,
2000 const char *, bool);
2001 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2002 static void ix86_function_specific_save (struct cl_target_option *);
2003 static void ix86_function_specific_restore (struct cl_target_option *);
2004 static void ix86_function_specific_print (FILE *, int,
2005 struct cl_target_option *);
2006 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2007 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
2008 static bool ix86_can_inline_p (tree, tree);
2009 static void ix86_set_current_function (tree);
2010 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2012 static enum calling_abi ix86_function_abi (const_tree);
2015 #ifndef SUBTARGET32_DEFAULT_CPU
2016 #define SUBTARGET32_DEFAULT_CPU "i386"
2019 /* The svr4 ABI for the i386 says that records and unions are returned
2021 #ifndef DEFAULT_PCC_STRUCT_RETURN
2022 #define DEFAULT_PCC_STRUCT_RETURN 1
2025 /* Whether -mtune= or -march= were specified */
2026 static int ix86_tune_defaulted;
2027 static int ix86_arch_specified;
2029 /* Bit flags that specify the ISA we are compiling for. */
2030 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
2032 /* A mask of ix86_isa_flags that includes bit X if X
2033 was set or cleared on the command line. */
2034 static int ix86_isa_flags_explicit;
2036 /* Define a set of ISAs which are available when a given ISA is
2037 enabled. MMX and SSE ISAs are handled separately. */
2039 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
2040 #define OPTION_MASK_ISA_3DNOW_SET \
2041 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
2043 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
2044 #define OPTION_MASK_ISA_SSE2_SET \
2045 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
2046 #define OPTION_MASK_ISA_SSE3_SET \
2047 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
2048 #define OPTION_MASK_ISA_SSSE3_SET \
2049 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
2050 #define OPTION_MASK_ISA_SSE4_1_SET \
2051 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
2052 #define OPTION_MASK_ISA_SSE4_2_SET \
2053 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
2054 #define OPTION_MASK_ISA_AVX_SET \
2055 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
2056 #define OPTION_MASK_ISA_FMA_SET \
2057 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
2059 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
2061 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
2063 #define OPTION_MASK_ISA_SSE4A_SET \
2064 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
2065 #define OPTION_MASK_ISA_FMA4_SET \
2066 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \
2067 | OPTION_MASK_ISA_AVX_SET)
2068 #define OPTION_MASK_ISA_XOP_SET \
2069 (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET)
2070 #define OPTION_MASK_ISA_LWP_SET \
2073 /* AES and PCLMUL need SSE2 because they use xmm registers */
2074 #define OPTION_MASK_ISA_AES_SET \
2075 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
2076 #define OPTION_MASK_ISA_PCLMUL_SET \
2077 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
2079 #define OPTION_MASK_ISA_ABM_SET \
2080 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
2082 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
2083 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
2084 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
2085 #define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
2086 #define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
2088 #define OPTION_MASK_ISA_FSGSBASE_SET OPTION_MASK_ISA_FSGSBASE
2089 #define OPTION_MASK_ISA_RDRND_SET OPTION_MASK_ISA_RDRND
2090 #define OPTION_MASK_ISA_F16C_SET \
2091 (OPTION_MASK_ISA_F16C | OPTION_MASK_ISA_AVX_SET)
2093 /* Define a set of ISAs which aren't available when a given ISA is
2094 disabled. MMX and SSE ISAs are handled separately. */
2096 #define OPTION_MASK_ISA_MMX_UNSET \
2097 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
2098 #define OPTION_MASK_ISA_3DNOW_UNSET \
2099 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
2100 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
2102 #define OPTION_MASK_ISA_SSE_UNSET \
2103 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
2104 #define OPTION_MASK_ISA_SSE2_UNSET \
2105 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
2106 #define OPTION_MASK_ISA_SSE3_UNSET \
2107 (OPTION_MASK_ISA_SSE3 \
2108 | OPTION_MASK_ISA_SSSE3_UNSET \
2109 | OPTION_MASK_ISA_SSE4A_UNSET )
2110 #define OPTION_MASK_ISA_SSSE3_UNSET \
2111 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
2112 #define OPTION_MASK_ISA_SSE4_1_UNSET \
2113 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
2114 #define OPTION_MASK_ISA_SSE4_2_UNSET \
2115 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
2116 #define OPTION_MASK_ISA_AVX_UNSET \
2117 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
2118 | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET)
2119 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2121 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2123 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2125 #define OPTION_MASK_ISA_SSE4A_UNSET \
2126 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET)
2128 #define OPTION_MASK_ISA_FMA4_UNSET \
2129 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET)
2130 #define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP
2131 #define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP
2133 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2134 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2135 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2136 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2137 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2138 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2139 #define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
2140 #define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
2142 #define OPTION_MASK_ISA_FSGSBASE_UNSET OPTION_MASK_ISA_FSGSBASE
2143 #define OPTION_MASK_ISA_RDRND_UNSET OPTION_MASK_ISA_RDRND
2144 #define OPTION_MASK_ISA_F16C_UNSET OPTION_MASK_ISA_F16C
2146 /* Vectorization library interface and handlers. */
2147 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2149 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2150 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2152 /* Processor target table, indexed by processor number */
2155 const struct processor_costs *cost; /* Processor costs */
2156 const int align_loop; /* Default alignments. */
2157 const int align_loop_max_skip;
2158 const int align_jump;
2159 const int align_jump_max_skip;
2160 const int align_func;
2163 static const struct ptt processor_target_table[PROCESSOR_max] =
2165 {&i386_cost, 4, 3, 4, 3, 4},
2166 {&i486_cost, 16, 15, 16, 15, 16},
2167 {&pentium_cost, 16, 7, 16, 7, 16},
2168 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2169 {&geode_cost, 0, 0, 0, 0, 0},
2170 {&k6_cost, 32, 7, 32, 7, 32},
2171 {&athlon_cost, 16, 7, 16, 7, 16},
2172 {&pentium4_cost, 0, 0, 0, 0, 0},
2173 {&k8_cost, 16, 7, 16, 7, 16},
2174 {&nocona_cost, 0, 0, 0, 0, 0},
2175 {&core2_cost, 16, 10, 16, 10, 16},
2176 {&generic32_cost, 16, 7, 16, 7, 16},
2177 {&generic64_cost, 16, 10, 16, 10, 16},
2178 {&amdfam10_cost, 32, 24, 32, 7, 32},
2179 {&bdver1_cost, 32, 24, 32, 7, 32},
2180 {&atom_cost, 16, 7, 16, 7, 16}
2183 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2210 /* Implement TARGET_HANDLE_OPTION. */
2213 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
2220 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2221 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2225 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2226 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2233 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2234 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2238 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2239 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2249 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2250 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2254 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2255 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2262 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2263 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2267 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2268 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2275 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2276 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2280 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2281 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2288 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2289 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2293 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2294 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2301 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2302 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2306 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2307 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2314 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2315 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2319 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2320 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2327 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2328 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2332 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2333 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2340 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2341 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2345 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2346 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2351 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2352 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2356 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2357 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2363 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2364 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2368 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2369 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2376 ix86_isa_flags |= OPTION_MASK_ISA_FMA4_SET;
2377 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_SET;
2381 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA4_UNSET;
2382 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_UNSET;
2389 ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET;
2390 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET;
2394 ix86_isa_flags &= ~OPTION_MASK_ISA_XOP_UNSET;
2395 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_UNSET;
2402 ix86_isa_flags |= OPTION_MASK_ISA_LWP_SET;
2403 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_SET;
2407 ix86_isa_flags &= ~OPTION_MASK_ISA_LWP_UNSET;
2408 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_UNSET;
2415 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2416 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2420 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2421 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2428 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2429 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2433 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2434 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2441 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2442 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2446 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2447 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2454 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2455 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2459 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2460 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2467 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
2468 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
2472 ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
2473 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
2480 ix86_isa_flags |= OPTION_MASK_ISA_CRC32_SET;
2481 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_SET;
2485 ix86_isa_flags &= ~OPTION_MASK_ISA_CRC32_UNSET;
2486 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_UNSET;
2493 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2494 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2498 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2499 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2506 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2507 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2511 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2512 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2519 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE_SET;
2520 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_SET;
2524 ix86_isa_flags &= ~OPTION_MASK_ISA_FSGSBASE_UNSET;
2525 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_UNSET;
2532 ix86_isa_flags |= OPTION_MASK_ISA_RDRND_SET;
2533 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_SET;
2537 ix86_isa_flags &= ~OPTION_MASK_ISA_RDRND_UNSET;
2538 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_UNSET;
2545 ix86_isa_flags |= OPTION_MASK_ISA_F16C_SET;
2546 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_SET;
2550 ix86_isa_flags &= ~OPTION_MASK_ISA_F16C_UNSET;
2551 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_UNSET;
2560 /* Return a string that documents the current -m options. The caller is
2561 responsible for freeing the string. */
2564 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2565 const char *fpmath, bool add_nl_p)
2567 struct ix86_target_opts
2569 const char *option; /* option string */
2570 int mask; /* isa mask options */
2573 /* This table is ordered so that options like -msse4.2 that imply
2574 preceding options while match those first. */
2575 static struct ix86_target_opts isa_opts[] =
2577 { "-m64", OPTION_MASK_ISA_64BIT },
2578 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2579 { "-mfma", OPTION_MASK_ISA_FMA },
2580 { "-mxop", OPTION_MASK_ISA_XOP },
2581 { "-mlwp", OPTION_MASK_ISA_LWP },
2582 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2583 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2584 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2585 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2586 { "-msse3", OPTION_MASK_ISA_SSE3 },
2587 { "-msse2", OPTION_MASK_ISA_SSE2 },
2588 { "-msse", OPTION_MASK_ISA_SSE },
2589 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2590 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2591 { "-mmmx", OPTION_MASK_ISA_MMX },
2592 { "-mabm", OPTION_MASK_ISA_ABM },
2593 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2594 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2595 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2596 { "-maes", OPTION_MASK_ISA_AES },
2597 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2598 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2599 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2600 { "-mf16c", OPTION_MASK_ISA_F16C },
2604 static struct ix86_target_opts flag_opts[] =
2606 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2607 { "-m80387", MASK_80387 },
2608 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2609 { "-malign-double", MASK_ALIGN_DOUBLE },
2610 { "-mcld", MASK_CLD },
2611 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2612 { "-mieee-fp", MASK_IEEE_FP },
2613 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2614 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2615 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2616 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2617 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2618 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2619 { "-mno-red-zone", MASK_NO_RED_ZONE },
2620 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2621 { "-mrecip", MASK_RECIP },
2622 { "-mrtd", MASK_RTD },
2623 { "-msseregparm", MASK_SSEREGPARM },
2624 { "-mstack-arg-probe", MASK_STACK_PROBE },
2625 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2628 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2631 char target_other[40];
2640 memset (opts, '\0', sizeof (opts));
2642 /* Add -march= option. */
2645 opts[num][0] = "-march=";
2646 opts[num++][1] = arch;
2649 /* Add -mtune= option. */
2652 opts[num][0] = "-mtune=";
2653 opts[num++][1] = tune;
2656 /* Pick out the options in isa options. */
2657 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2659 if ((isa & isa_opts[i].mask) != 0)
2661 opts[num++][0] = isa_opts[i].option;
2662 isa &= ~ isa_opts[i].mask;
2666 if (isa && add_nl_p)
2668 opts[num++][0] = isa_other;
2669 sprintf (isa_other, "(other isa: %#x)", isa);
2672 /* Add flag options. */
2673 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2675 if ((flags & flag_opts[i].mask) != 0)
2677 opts[num++][0] = flag_opts[i].option;
2678 flags &= ~ flag_opts[i].mask;
2682 if (flags && add_nl_p)
2684 opts[num++][0] = target_other;
2685 sprintf (target_other, "(other flags: %#x)", flags);
2688 /* Add -fpmath= option. */
2691 opts[num][0] = "-mfpmath=";
2692 opts[num++][1] = fpmath;
2699 gcc_assert (num < ARRAY_SIZE (opts));
2701 /* Size the string. */
2703 sep_len = (add_nl_p) ? 3 : 1;
2704 for (i = 0; i < num; i++)
2707 for (j = 0; j < 2; j++)
2709 len += strlen (opts[i][j]);
2712 /* Build the string. */
2713 ret = ptr = (char *) xmalloc (len);
2716 for (i = 0; i < num; i++)
2720 for (j = 0; j < 2; j++)
2721 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2728 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2736 for (j = 0; j < 2; j++)
2739 memcpy (ptr, opts[i][j], len2[j]);
2741 line_len += len2[j];
2746 gcc_assert (ret + len >= ptr);
2751 /* Return TRUE if software prefetching is beneficial for the
2755 software_prefetching_beneficial_p (void)
2759 case PROCESSOR_GEODE:
2761 case PROCESSOR_ATHLON:
2763 case PROCESSOR_AMDFAM10:
2771 /* Return true, if profiling code should be emitted before
2772 prologue. Otherwise it returns false.
2773 Note: For x86 with "hotfix" it is sorried. */
2775 ix86_profile_before_prologue (void)
2777 return flag_fentry != 0;
2780 /* Function that is callable from the debugger to print the current
2783 ix86_debug_options (void)
2785 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2786 ix86_arch_string, ix86_tune_string,
2787 ix86_fpmath_string, true);
2791 fprintf (stderr, "%s\n\n", opts);
2795 fputs ("<no options>\n\n", stderr);
2800 /* Sometimes certain combinations of command options do not make
2801 sense on a particular target machine. You can define a macro
2802 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2803 defined, is executed once just after all the command options have
2806 Don't use this macro to turn on various extra optimizations for
2807 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2810 override_options (bool main_args_p)
2813 unsigned int ix86_arch_mask, ix86_tune_mask;
2814 const bool ix86_tune_specified = (ix86_tune_string != NULL);
2819 /* Comes from final.c -- no real reason to change it. */
2820 #define MAX_CODE_ALIGN 16
2828 PTA_PREFETCH_SSE = 1 << 4,
2830 PTA_3DNOW_A = 1 << 6,
2834 PTA_POPCNT = 1 << 10,
2836 PTA_SSE4A = 1 << 12,
2837 PTA_NO_SAHF = 1 << 13,
2838 PTA_SSE4_1 = 1 << 14,
2839 PTA_SSE4_2 = 1 << 15,
2841 PTA_PCLMUL = 1 << 17,
2844 PTA_MOVBE = 1 << 20,
2848 PTA_FSGSBASE = 1 << 24,
2849 PTA_RDRND = 1 << 25,
2855 const char *const name; /* processor name or nickname. */
2856 const enum processor_type processor;
2857 const enum attr_cpu schedule;
2858 const unsigned /*enum pta_flags*/ flags;
2860 const processor_alias_table[] =
2862 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2863 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2864 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2865 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2866 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2867 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2868 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2869 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2870 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2871 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2872 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2873 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2874 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2876 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2878 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2879 PTA_MMX | PTA_SSE | PTA_SSE2},
2880 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2881 PTA_MMX |PTA_SSE | PTA_SSE2},
2882 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2883 PTA_MMX | PTA_SSE | PTA_SSE2},
2884 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2885 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2886 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2887 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2888 | PTA_CX16 | PTA_NO_SAHF},
2889 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2890 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2891 | PTA_SSSE3 | PTA_CX16},
2892 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2893 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2894 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
2895 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2896 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2897 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2898 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2899 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2900 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2901 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2902 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2903 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2904 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2905 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2906 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2907 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2908 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2909 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2910 {"x86-64", PROCESSOR_K8, CPU_K8,
2911 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2912 {"k8", PROCESSOR_K8, CPU_K8,
2913 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2914 | PTA_SSE2 | PTA_NO_SAHF},
2915 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2916 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2917 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2918 {"opteron", PROCESSOR_K8, CPU_K8,
2919 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2920 | PTA_SSE2 | PTA_NO_SAHF},
2921 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2922 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2923 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2924 {"athlon64", PROCESSOR_K8, CPU_K8,
2925 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2926 | PTA_SSE2 | PTA_NO_SAHF},
2927 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2928 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2929 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2930 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2931 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2932 | PTA_SSE2 | PTA_NO_SAHF},
2933 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2934 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2935 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2936 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2937 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2938 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2939 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
2940 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2941 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM
2942 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES
2943 | PTA_PCLMUL | PTA_AVX | PTA_FMA4 | PTA_XOP | PTA_LWP},
2944 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2945 0 /* flags are only used for -march switch. */ },
2946 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2947 PTA_64BIT /* flags are only used for -march switch. */ },
2950 int const pta_size = ARRAY_SIZE (processor_alias_table);
2952 /* Set up prefix/suffix so the error messages refer to either the command
2953 line argument, or the attribute(target). */
2962 prefix = "option(\"";
2967 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2968 SUBTARGET_OVERRIDE_OPTIONS;
2971 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2972 SUBSUBTARGET_OVERRIDE_OPTIONS;
2975 /* -fPIC is the default for x86_64. */
2976 if (TARGET_MACHO && TARGET_64BIT)
2979 /* Set the default values for switches whose default depends on TARGET_64BIT
2980 in case they weren't overwritten by command line options. */
2985 /* Mach-O doesn't support omitting the frame pointer for now. */
2986 if (flag_omit_frame_pointer == 2)
2987 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2988 if (flag_asynchronous_unwind_tables == 2)
2989 flag_asynchronous_unwind_tables = 1;
2990 if (flag_pcc_struct_return == 2)
2991 flag_pcc_struct_return = 0;
2997 if (flag_omit_frame_pointer == 2)
2998 flag_omit_frame_pointer = 0;
2999 if (flag_asynchronous_unwind_tables == 2)
3000 flag_asynchronous_unwind_tables = 0;
3001 if (flag_pcc_struct_return == 2)
3002 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3005 /* Need to check -mtune=generic first. */
3006 if (ix86_tune_string)
3008 if (!strcmp (ix86_tune_string, "generic")
3009 || !strcmp (ix86_tune_string, "i686")
3010 /* As special support for cross compilers we read -mtune=native
3011 as -mtune=generic. With native compilers we won't see the
3012 -mtune=native, as it was changed by the driver. */
3013 || !strcmp (ix86_tune_string, "native"))
3016 ix86_tune_string = "generic64";
3018 ix86_tune_string = "generic32";
3020 /* If this call is for setting the option attribute, allow the
3021 generic32/generic64 that was previously set. */
3022 else if (!main_args_p
3023 && (!strcmp (ix86_tune_string, "generic32")
3024 || !strcmp (ix86_tune_string, "generic64")))
3026 else if (!strncmp (ix86_tune_string, "generic", 7))
3027 error ("bad value (%s) for %stune=%s %s",
3028 ix86_tune_string, prefix, suffix, sw);
3029 else if (!strcmp (ix86_tune_string, "x86-64"))
3030 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
3031 "%stune=k8%s or %stune=generic%s instead as appropriate.",
3032 prefix, suffix, prefix, suffix, prefix, suffix);
3036 if (ix86_arch_string)
3037 ix86_tune_string = ix86_arch_string;
3038 if (!ix86_tune_string)
3040 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3041 ix86_tune_defaulted = 1;
3044 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3045 need to use a sensible tune option. */
3046 if (!strcmp (ix86_tune_string, "generic")
3047 || !strcmp (ix86_tune_string, "x86-64")
3048 || !strcmp (ix86_tune_string, "i686"))
3051 ix86_tune_string = "generic64";
3053 ix86_tune_string = "generic32";
3057 if (ix86_stringop_string)
3059 if (!strcmp (ix86_stringop_string, "rep_byte"))
3060 stringop_alg = rep_prefix_1_byte;
3061 else if (!strcmp (ix86_stringop_string, "libcall"))
3062 stringop_alg = libcall;
3063 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
3064 stringop_alg = rep_prefix_4_byte;
3065 else if (!strcmp (ix86_stringop_string, "rep_8byte")
3067 /* rep; movq isn't available in 32-bit code. */
3068 stringop_alg = rep_prefix_8_byte;
3069 else if (!strcmp (ix86_stringop_string, "byte_loop"))
3070 stringop_alg = loop_1_byte;
3071 else if (!strcmp (ix86_stringop_string, "loop"))
3072 stringop_alg = loop;
3073 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
3074 stringop_alg = unrolled_loop;
3076 error ("bad value (%s) for %sstringop-strategy=%s %s",
3077 ix86_stringop_string, prefix, suffix, sw);
3080 if (!ix86_arch_string)
3081 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3083 ix86_arch_specified = 1;
3085 /* Validate -mabi= value. */
3086 if (ix86_abi_string)
3088 if (strcmp (ix86_abi_string, "sysv") == 0)
3089 ix86_abi = SYSV_ABI;
3090 else if (strcmp (ix86_abi_string, "ms") == 0)
3093 error ("unknown ABI (%s) for %sabi=%s %s",
3094 ix86_abi_string, prefix, suffix, sw);
3097 ix86_abi = DEFAULT_ABI;
3099 if (ix86_cmodel_string != 0)
3101 if (!strcmp (ix86_cmodel_string, "small"))
3102 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3103 else if (!strcmp (ix86_cmodel_string, "medium"))
3104 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
3105 else if (!strcmp (ix86_cmodel_string, "large"))
3106 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
3108 error ("code model %s does not support PIC mode", ix86_cmodel_string);
3109 else if (!strcmp (ix86_cmodel_string, "32"))
3110 ix86_cmodel = CM_32;
3111 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
3112 ix86_cmodel = CM_KERNEL;
3114 error ("bad value (%s) for %scmodel=%s %s",
3115 ix86_cmodel_string, prefix, suffix, sw);
3119 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3120 use of rip-relative addressing. This eliminates fixups that
3121 would otherwise be needed if this object is to be placed in a
3122 DLL, and is essentially just as efficient as direct addressing. */
3123 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3124 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3125 else if (TARGET_64BIT)
3126 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3128 ix86_cmodel = CM_32;
3130 if (ix86_asm_string != 0)
3133 && !strcmp (ix86_asm_string, "intel"))
3134 ix86_asm_dialect = ASM_INTEL;
3135 else if (!strcmp (ix86_asm_string, "att"))
3136 ix86_asm_dialect = ASM_ATT;
3138 error ("bad value (%s) for %sasm=%s %s",
3139 ix86_asm_string, prefix, suffix, sw);
3141 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
3142 error ("code model %qs not supported in the %s bit mode",
3143 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
3144 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3145 sorry ("%i-bit mode not compiled in",
3146 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3148 for (i = 0; i < pta_size; i++)
3149 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3151 ix86_schedule = processor_alias_table[i].schedule;
3152 ix86_arch = processor_alias_table[i].processor;
3153 /* Default cpu tuning to the architecture. */
3154 ix86_tune = ix86_arch;
3156 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3157 error ("CPU you selected does not support x86-64 "
3160 if (processor_alias_table[i].flags & PTA_MMX
3161 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3162 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3163 if (processor_alias_table[i].flags & PTA_3DNOW
3164 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3165 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3166 if (processor_alias_table[i].flags & PTA_3DNOW_A
3167 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3168 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3169 if (processor_alias_table[i].flags & PTA_SSE
3170 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3171 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3172 if (processor_alias_table[i].flags & PTA_SSE2
3173 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3174 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3175 if (processor_alias_table[i].flags & PTA_SSE3
3176 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3177 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3178 if (processor_alias_table[i].flags & PTA_SSSE3
3179 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3180 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3181 if (processor_alias_table[i].flags & PTA_SSE4_1
3182 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3183 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3184 if (processor_alias_table[i].flags & PTA_SSE4_2
3185 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3186 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3187 if (processor_alias_table[i].flags & PTA_AVX
3188 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3189 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3190 if (processor_alias_table[i].flags & PTA_FMA
3191 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3192 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3193 if (processor_alias_table[i].flags & PTA_SSE4A
3194 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3195 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3196 if (processor_alias_table[i].flags & PTA_FMA4
3197 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3198 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3199 if (processor_alias_table[i].flags & PTA_XOP
3200 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3201 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3202 if (processor_alias_table[i].flags & PTA_LWP
3203 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3204 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3205 if (processor_alias_table[i].flags & PTA_ABM
3206 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3207 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3208 if (processor_alias_table[i].flags & PTA_CX16
3209 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3210 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3211 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3212 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3213 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3214 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3215 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3216 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3217 if (processor_alias_table[i].flags & PTA_MOVBE
3218 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3219 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3220 if (processor_alias_table[i].flags & PTA_AES
3221 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3222 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3223 if (processor_alias_table[i].flags & PTA_PCLMUL
3224 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3225 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3226 if (processor_alias_table[i].flags & PTA_FSGSBASE
3227 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3228 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3229 if (processor_alias_table[i].flags & PTA_RDRND
3230 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3231 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3232 if (processor_alias_table[i].flags & PTA_F16C
3233 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3234 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3235 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3236 x86_prefetch_sse = true;
3241 if (!strcmp (ix86_arch_string, "generic"))
3242 error ("generic CPU can be used only for %stune=%s %s",
3243 prefix, suffix, sw);
3244 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3245 error ("bad value (%s) for %sarch=%s %s",
3246 ix86_arch_string, prefix, suffix, sw);
3248 ix86_arch_mask = 1u << ix86_arch;
3249 for (i = 0; i < X86_ARCH_LAST; ++i)
3250 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3252 for (i = 0; i < pta_size; i++)
3253 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3255 ix86_schedule = processor_alias_table[i].schedule;
3256 ix86_tune = processor_alias_table[i].processor;
3257 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3259 if (ix86_tune_defaulted)
3261 ix86_tune_string = "x86-64";
3262 for (i = 0; i < pta_size; i++)
3263 if (! strcmp (ix86_tune_string,
3264 processor_alias_table[i].name))
3266 ix86_schedule = processor_alias_table[i].schedule;
3267 ix86_tune = processor_alias_table[i].processor;
3270 error ("CPU you selected does not support x86-64 "
3273 /* Intel CPUs have always interpreted SSE prefetch instructions as
3274 NOPs; so, we can enable SSE prefetch instructions even when
3275 -mtune (rather than -march) points us to a processor that has them.
3276 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3277 higher processors. */
3279 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3280 x86_prefetch_sse = true;
3284 if (ix86_tune_specified && i == pta_size)
3285 error ("bad value (%s) for %stune=%s %s",
3286 ix86_tune_string, prefix, suffix, sw);
3288 ix86_tune_mask = 1u << ix86_tune;
3289 for (i = 0; i < X86_TUNE_LAST; ++i)
3290 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3293 ix86_cost = &ix86_size_cost;
3295 ix86_cost = processor_target_table[ix86_tune].cost;
3297 /* Arrange to set up i386_stack_locals for all functions. */
3298 init_machine_status = ix86_init_machine_status;
3300 /* Validate -mregparm= value. */
3301 if (ix86_regparm_string)
3304 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3305 i = atoi (ix86_regparm_string);
3306 if (i < 0 || i > REGPARM_MAX)
3307 error ("%sregparm=%d%s is not between 0 and %d",
3308 prefix, i, suffix, REGPARM_MAX);
3313 ix86_regparm = REGPARM_MAX;
3315 /* If the user has provided any of the -malign-* options,
3316 warn and use that value only if -falign-* is not set.
3317 Remove this code in GCC 3.2 or later. */
3318 if (ix86_align_loops_string)
3320 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3321 prefix, suffix, suffix);
3322 if (align_loops == 0)
3324 i = atoi (ix86_align_loops_string);
3325 if (i < 0 || i > MAX_CODE_ALIGN)
3326 error ("%salign-loops=%d%s is not between 0 and %d",
3327 prefix, i, suffix, MAX_CODE_ALIGN);
3329 align_loops = 1 << i;
3333 if (ix86_align_jumps_string)
3335 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3336 prefix, suffix, suffix);
3337 if (align_jumps == 0)
3339 i = atoi (ix86_align_jumps_string);
3340 if (i < 0 || i > MAX_CODE_ALIGN)
3341 error ("%salign-loops=%d%s is not between 0 and %d",
3342 prefix, i, suffix, MAX_CODE_ALIGN);
3344 align_jumps = 1 << i;
3348 if (ix86_align_funcs_string)
3350 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3351 prefix, suffix, suffix);
3352 if (align_functions == 0)
3354 i = atoi (ix86_align_funcs_string);
3355 if (i < 0 || i > MAX_CODE_ALIGN)
3356 error ("%salign-loops=%d%s is not between 0 and %d",
3357 prefix, i, suffix, MAX_CODE_ALIGN);
3359 align_functions = 1 << i;
3363 /* Default align_* from the processor table. */
3364 if (align_loops == 0)
3366 align_loops = processor_target_table[ix86_tune].align_loop;
3367 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3369 if (align_jumps == 0)
3371 align_jumps = processor_target_table[ix86_tune].align_jump;
3372 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3374 if (align_functions == 0)
3376 align_functions = processor_target_table[ix86_tune].align_func;
3379 /* Validate -mbranch-cost= value, or provide default. */
3380 ix86_branch_cost = ix86_cost->branch_cost;
3381 if (ix86_branch_cost_string)
3383 i = atoi (ix86_branch_cost_string);
3385 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3387 ix86_branch_cost = i;
3389 if (ix86_section_threshold_string)
3391 i = atoi (ix86_section_threshold_string);
3393 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3395 ix86_section_threshold = i;
3398 if (ix86_tls_dialect_string)
3400 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3401 ix86_tls_dialect = TLS_DIALECT_GNU;
3402 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3403 ix86_tls_dialect = TLS_DIALECT_GNU2;
3405 error ("bad value (%s) for %stls-dialect=%s %s",
3406 ix86_tls_dialect_string, prefix, suffix, sw);
3409 if (ix87_precision_string)
3411 i = atoi (ix87_precision_string);
3412 if (i != 32 && i != 64 && i != 80)
3413 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3418 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3420 /* Enable by default the SSE and MMX builtins. Do allow the user to
3421 explicitly disable any of these. In particular, disabling SSE and
3422 MMX for kernel code is extremely useful. */
3423 if (!ix86_arch_specified)
3425 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3426 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3429 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3433 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3435 if (!ix86_arch_specified)
3437 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3439 /* i386 ABI does not specify red zone. It still makes sense to use it
3440 when programmer takes care to stack from being destroyed. */
3441 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3442 target_flags |= MASK_NO_RED_ZONE;
3445 /* Keep nonleaf frame pointers. */
3446 if (flag_omit_frame_pointer)
3447 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3448 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3449 flag_omit_frame_pointer = 1;
3451 /* If we're doing fast math, we don't care about comparison order
3452 wrt NaNs. This lets us use a shorter comparison sequence. */
3453 if (flag_finite_math_only)
3454 target_flags &= ~MASK_IEEE_FP;
3456 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3457 since the insns won't need emulation. */
3458 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3459 target_flags &= ~MASK_NO_FANCY_MATH_387;
3461 /* Likewise, if the target doesn't have a 387, or we've specified
3462 software floating point, don't use 387 inline intrinsics. */
3464 target_flags |= MASK_NO_FANCY_MATH_387;
3466 /* Turn on MMX builtins for -msse. */
3469 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3470 x86_prefetch_sse = true;
3473 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3474 if (TARGET_SSE4_2 || TARGET_ABM)
3475 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3477 /* Validate -mpreferred-stack-boundary= value or default it to
3478 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3479 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3480 if (ix86_preferred_stack_boundary_string)
3482 i = atoi (ix86_preferred_stack_boundary_string);
3483 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3484 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3485 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3487 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3490 /* Set the default value for -mstackrealign. */
3491 if (ix86_force_align_arg_pointer == -1)
3492 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3494 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3496 /* Validate -mincoming-stack-boundary= value or default it to
3497 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3498 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3499 if (ix86_incoming_stack_boundary_string)
3501 i = atoi (ix86_incoming_stack_boundary_string);
3502 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3503 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3504 i, TARGET_64BIT ? 4 : 2);
3507 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3508 ix86_incoming_stack_boundary
3509 = ix86_user_incoming_stack_boundary;
3513 /* Accept -msseregparm only if at least SSE support is enabled. */
3514 if (TARGET_SSEREGPARM
3516 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3518 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3519 if (ix86_fpmath_string != 0)
3521 if (! strcmp (ix86_fpmath_string, "387"))
3522 ix86_fpmath = FPMATH_387;
3523 else if (! strcmp (ix86_fpmath_string, "sse"))
3527 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3528 ix86_fpmath = FPMATH_387;
3531 ix86_fpmath = FPMATH_SSE;
3533 else if (! strcmp (ix86_fpmath_string, "387,sse")
3534 || ! strcmp (ix86_fpmath_string, "387+sse")
3535 || ! strcmp (ix86_fpmath_string, "sse,387")
3536 || ! strcmp (ix86_fpmath_string, "sse+387")
3537 || ! strcmp (ix86_fpmath_string, "both"))
3541 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3542 ix86_fpmath = FPMATH_387;
3544 else if (!TARGET_80387)
3546 warning (0, "387 instruction set disabled, using SSE arithmetics");
3547 ix86_fpmath = FPMATH_SSE;
3550 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3553 error ("bad value (%s) for %sfpmath=%s %s",
3554 ix86_fpmath_string, prefix, suffix, sw);
3557 /* If the i387 is disabled, then do not return values in it. */
3559 target_flags &= ~MASK_FLOAT_RETURNS;
3561 /* Use external vectorized library in vectorizing intrinsics. */
3562 if (ix86_veclibabi_string)
3564 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3565 ix86_veclib_handler = ix86_veclibabi_svml;
3566 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3567 ix86_veclib_handler = ix86_veclibabi_acml;
3569 error ("unknown vectorization library ABI type (%s) for "
3570 "%sveclibabi=%s %s", ix86_veclibabi_string,
3571 prefix, suffix, sw);
3574 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
3575 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3577 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3579 /* ??? Unwind info is not correct around the CFG unless either a frame
3580 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3581 unwind info generation to be aware of the CFG and propagating states
3583 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3584 || flag_exceptions || flag_non_call_exceptions)
3585 && flag_omit_frame_pointer
3586 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3588 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3589 warning (0, "unwind tables currently require either a frame pointer "
3590 "or %saccumulate-outgoing-args%s for correctness",
3592 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3595 /* If stack probes are required, the space used for large function
3596 arguments on the stack must also be probed, so enable
3597 -maccumulate-outgoing-args so this happens in the prologue. */
3598 if (TARGET_STACK_PROBE
3599 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3601 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3602 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3603 "for correctness", prefix, suffix);
3604 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3607 /* For sane SSE instruction set generation we need fcomi instruction.
3608 It is safe to enable all CMOVE instructions. */
3612 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3615 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3616 p = strchr (internal_label_prefix, 'X');
3617 internal_label_prefix_len = p - internal_label_prefix;
3621 /* When scheduling description is not available, disable scheduler pass
3622 so it won't slow down the compilation and make x87 code slower. */
3623 if (!TARGET_SCHEDULE)
3624 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3626 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3627 set_param_value ("simultaneous-prefetches",
3628 ix86_cost->simultaneous_prefetches);
3629 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3630 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3631 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3632 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3633 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3634 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3636 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3637 if (flag_prefetch_loop_arrays < 0
3640 && software_prefetching_beneficial_p ())
3641 flag_prefetch_loop_arrays = 1;
3643 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3644 can be optimized to ap = __builtin_next_arg (0). */
3646 targetm.expand_builtin_va_start = NULL;
3650 ix86_gen_leave = gen_leave_rex64;
3651 ix86_gen_pop1 = gen_popdi1;
3652 ix86_gen_add3 = gen_adddi3;
3653 ix86_gen_sub3 = gen_subdi3;
3654 ix86_gen_sub3_carry = gen_subdi3_carry;
3655 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3656 ix86_gen_monitor = gen_sse3_monitor64;
3657 ix86_gen_andsp = gen_anddi3;
3658 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_64;
3659 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
3660 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
3664 ix86_gen_leave = gen_leave;
3665 ix86_gen_pop1 = gen_popsi1;
3666 ix86_gen_add3 = gen_addsi3;
3667 ix86_gen_sub3 = gen_subsi3;
3668 ix86_gen_sub3_carry = gen_subsi3_carry;
3669 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3670 ix86_gen_monitor = gen_sse3_monitor;
3671 ix86_gen_andsp = gen_andsi3;
3672 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_32;
3673 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
3674 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
3678 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3680 target_flags |= MASK_CLD & ~target_flags_explicit;
3683 if (!TARGET_64BIT && flag_pic)
3685 if (flag_fentry > 0)
3686 sorry ("-mfentry isn't supported for 32-bit in combination with -fpic");
3689 if (flag_fentry < 0)
3691 #if defined(PROFILE_BEFORE_PROLOGUE)
3698 /* Save the initial options in case the user does function specific options */
3700 target_option_default_node = target_option_current_node
3701 = build_target_option_node ();
3704 /* Update register usage after having seen the compiler flags. */
3707 ix86_conditional_register_usage (void)
3712 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3714 if (fixed_regs[i] > 1)
3715 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
3716 if (call_used_regs[i] > 1)
3717 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
3720 /* The PIC register, if it exists, is fixed. */
3721 j = PIC_OFFSET_TABLE_REGNUM;
3722 if (j != INVALID_REGNUM)
3723 fixed_regs[j] = call_used_regs[j] = 1;
3725 /* The MS_ABI changes the set of call-used registers. */
3726 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3728 call_used_regs[SI_REG] = 0;
3729 call_used_regs[DI_REG] = 0;
3730 call_used_regs[XMM6_REG] = 0;
3731 call_used_regs[XMM7_REG] = 0;
3732 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3733 call_used_regs[i] = 0;
3736 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3737 other call-clobbered regs for 64-bit. */
3740 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
3742 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3743 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
3744 && call_used_regs[i])
3745 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
3748 /* If MMX is disabled, squash the registers. */
3750 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3751 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
3752 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3754 /* If SSE is disabled, squash the registers. */
3756 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3757 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
3758 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3760 /* If the FPU is disabled, squash the registers. */
3761 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3762 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3763 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
3764 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3766 /* If 32-bit, squash the 64-bit registers. */
3769 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
3771 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3777 /* Save the current options */
3780 ix86_function_specific_save (struct cl_target_option *ptr)
3782 ptr->arch = ix86_arch;
3783 ptr->schedule = ix86_schedule;
3784 ptr->tune = ix86_tune;
3785 ptr->fpmath = ix86_fpmath;
3786 ptr->branch_cost = ix86_branch_cost;
3787 ptr->tune_defaulted = ix86_tune_defaulted;
3788 ptr->arch_specified = ix86_arch_specified;
3789 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3790 ptr->target_flags_explicit = target_flags_explicit;
3792 /* The fields are char but the variables are not; make sure the
3793 values fit in the fields. */
3794 gcc_assert (ptr->arch == ix86_arch);
3795 gcc_assert (ptr->schedule == ix86_schedule);
3796 gcc_assert (ptr->tune == ix86_tune);
3797 gcc_assert (ptr->fpmath == ix86_fpmath);
3798 gcc_assert (ptr->branch_cost == ix86_branch_cost);
3801 /* Restore the current options */
3804 ix86_function_specific_restore (struct cl_target_option *ptr)
3806 enum processor_type old_tune = ix86_tune;
3807 enum processor_type old_arch = ix86_arch;
3808 unsigned int ix86_arch_mask, ix86_tune_mask;
3811 ix86_arch = (enum processor_type) ptr->arch;
3812 ix86_schedule = (enum attr_cpu) ptr->schedule;
3813 ix86_tune = (enum processor_type) ptr->tune;
3814 ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
3815 ix86_branch_cost = ptr->branch_cost;
3816 ix86_tune_defaulted = ptr->tune_defaulted;
3817 ix86_arch_specified = ptr->arch_specified;
3818 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3819 target_flags_explicit = ptr->target_flags_explicit;
3821 /* Recreate the arch feature tests if the arch changed */
3822 if (old_arch != ix86_arch)
3824 ix86_arch_mask = 1u << ix86_arch;
3825 for (i = 0; i < X86_ARCH_LAST; ++i)
3826 ix86_arch_features[i]
3827 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3830 /* Recreate the tune optimization tests */
3831 if (old_tune != ix86_tune)
3833 ix86_tune_mask = 1u << ix86_tune;
3834 for (i = 0; i < X86_TUNE_LAST; ++i)
3835 ix86_tune_features[i]
3836 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3840 /* Print the current options */
3843 ix86_function_specific_print (FILE *file, int indent,
3844 struct cl_target_option *ptr)
3847 = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3848 NULL, NULL, NULL, false);
3850 fprintf (file, "%*sarch = %d (%s)\n",
3853 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3854 ? cpu_names[ptr->arch]
3857 fprintf (file, "%*stune = %d (%s)\n",
3860 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3861 ? cpu_names[ptr->tune]
3864 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3865 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3866 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3867 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3871 fprintf (file, "%*s%s\n", indent, "", target_string);
3872 free (target_string);
3877 /* Inner function to process the attribute((target(...))), take an argument and
3878 set the current options from the argument. If we have a list, recursively go
3882 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3887 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3888 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3889 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3890 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3905 enum ix86_opt_type type;
3910 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3911 IX86_ATTR_ISA ("abm", OPT_mabm),
3912 IX86_ATTR_ISA ("aes", OPT_maes),
3913 IX86_ATTR_ISA ("avx", OPT_mavx),
3914 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3915 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3916 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3917 IX86_ATTR_ISA ("sse", OPT_msse),
3918 IX86_ATTR_ISA ("sse2", OPT_msse2),
3919 IX86_ATTR_ISA ("sse3", OPT_msse3),
3920 IX86_ATTR_ISA ("sse4", OPT_msse4),
3921 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3922 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3923 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3924 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3925 IX86_ATTR_ISA ("fma4", OPT_mfma4),
3926 IX86_ATTR_ISA ("xop", OPT_mxop),
3927 IX86_ATTR_ISA ("lwp", OPT_mlwp),
3928 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
3929 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
3930 IX86_ATTR_ISA ("f16c", OPT_mf16c),
3932 /* string options */
3933 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3934 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3935 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3938 IX86_ATTR_YES ("cld",
3942 IX86_ATTR_NO ("fancy-math-387",
3943 OPT_mfancy_math_387,
3944 MASK_NO_FANCY_MATH_387),
3946 IX86_ATTR_YES ("ieee-fp",
3950 IX86_ATTR_YES ("inline-all-stringops",
3951 OPT_minline_all_stringops,
3952 MASK_INLINE_ALL_STRINGOPS),
3954 IX86_ATTR_YES ("inline-stringops-dynamically",
3955 OPT_minline_stringops_dynamically,
3956 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3958 IX86_ATTR_NO ("align-stringops",
3959 OPT_mno_align_stringops,
3960 MASK_NO_ALIGN_STRINGOPS),
3962 IX86_ATTR_YES ("recip",
3968 /* If this is a list, recurse to get the options. */
3969 if (TREE_CODE (args) == TREE_LIST)
3973 for (; args; args = TREE_CHAIN (args))
3974 if (TREE_VALUE (args)
3975 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3981 else if (TREE_CODE (args) != STRING_CST)
3984 /* Handle multiple arguments separated by commas. */
3985 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
3987 while (next_optstr && *next_optstr != '\0')
3989 char *p = next_optstr;
3991 char *comma = strchr (next_optstr, ',');
3992 const char *opt_string;
3993 size_t len, opt_len;
3998 enum ix86_opt_type type = ix86_opt_unknown;
4004 len = comma - next_optstr;
4005 next_optstr = comma + 1;
4013 /* Recognize no-xxx. */
4014 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4023 /* Find the option. */
4026 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4028 type = attrs[i].type;
4029 opt_len = attrs[i].len;
4030 if (ch == attrs[i].string[0]
4031 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
4032 && memcmp (p, attrs[i].string, opt_len) == 0)
4035 mask = attrs[i].mask;
4036 opt_string = attrs[i].string;
4041 /* Process the option. */
4044 error ("attribute(target(\"%s\")) is unknown", orig_p);
4048 else if (type == ix86_opt_isa)
4049 ix86_handle_option (opt, p, opt_set_p);
4051 else if (type == ix86_opt_yes || type == ix86_opt_no)
4053 if (type == ix86_opt_no)
4054 opt_set_p = !opt_set_p;
4057 target_flags |= mask;
4059 target_flags &= ~mask;
4062 else if (type == ix86_opt_str)
4066 error ("option(\"%s\") was already specified", opt_string);
4070 p_strings[opt] = xstrdup (p + opt_len);
4080 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4083 ix86_valid_target_attribute_tree (tree args)
4085 const char *orig_arch_string = ix86_arch_string;
4086 const char *orig_tune_string = ix86_tune_string;
4087 const char *orig_fpmath_string = ix86_fpmath_string;
4088 int orig_tune_defaulted = ix86_tune_defaulted;
4089 int orig_arch_specified = ix86_arch_specified;
4090 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
4093 struct cl_target_option *def
4094 = TREE_TARGET_OPTION (target_option_default_node);
4096 /* Process each of the options on the chain. */
4097 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
4100 /* If the changed options are different from the default, rerun override_options,
4101 and then save the options away. The string options are are attribute options,
4102 and will be undone when we copy the save structure. */
4103 if (ix86_isa_flags != def->ix86_isa_flags
4104 || target_flags != def->target_flags
4105 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4106 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4107 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4109 /* If we are using the default tune= or arch=, undo the string assigned,
4110 and use the default. */
4111 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4112 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4113 else if (!orig_arch_specified)
4114 ix86_arch_string = NULL;
4116 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4117 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4118 else if (orig_tune_defaulted)
4119 ix86_tune_string = NULL;
4121 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4122 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4123 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
4124 else if (!TARGET_64BIT && TARGET_SSE)
4125 ix86_fpmath_string = "sse,387";
4127 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4128 override_options (false);
4130 /* Add any builtin functions with the new isa if any. */
4131 ix86_add_new_builtins (ix86_isa_flags);
4133 /* Save the current options unless we are validating options for
4135 t = build_target_option_node ();
4137 ix86_arch_string = orig_arch_string;
4138 ix86_tune_string = orig_tune_string;
4139 ix86_fpmath_string = orig_fpmath_string;
4141 /* Free up memory allocated to hold the strings */
4142 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4143 if (option_strings[i])
4144 free (option_strings[i]);
4150 /* Hook to validate attribute((target("string"))). */
4153 ix86_valid_target_attribute_p (tree fndecl,
4154 tree ARG_UNUSED (name),
4156 int ARG_UNUSED (flags))
4158 struct cl_target_option cur_target;
4160 tree old_optimize = build_optimization_node ();
4161 tree new_target, new_optimize;
4162 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4164 /* If the function changed the optimization levels as well as setting target
4165 options, start with the optimizations specified. */
4166 if (func_optimize && func_optimize != old_optimize)
4167 cl_optimization_restore (TREE_OPTIMIZATION (func_optimize));
4169 /* The target attributes may also change some optimization flags, so update
4170 the optimization options if necessary. */
4171 cl_target_option_save (&cur_target);
4172 new_target = ix86_valid_target_attribute_tree (args);
4173 new_optimize = build_optimization_node ();
4180 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4182 if (old_optimize != new_optimize)
4183 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4186 cl_target_option_restore (&cur_target);
4188 if (old_optimize != new_optimize)
4189 cl_optimization_restore (TREE_OPTIMIZATION (old_optimize));
4195 /* Hook to determine if one function can safely inline another. */
4198 ix86_can_inline_p (tree caller, tree callee)
4201 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4202 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4204 /* If callee has no option attributes, then it is ok to inline. */
4208 /* If caller has no option attributes, but callee does then it is not ok to
4210 else if (!caller_tree)
4215 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4216 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4218 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4219 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4221 if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
4222 != callee_opts->ix86_isa_flags)
4225 /* See if we have the same non-isa options. */
4226 else if (caller_opts->target_flags != callee_opts->target_flags)
4229 /* See if arch, tune, etc. are the same. */
4230 else if (caller_opts->arch != callee_opts->arch)
4233 else if (caller_opts->tune != callee_opts->tune)
4236 else if (caller_opts->fpmath != callee_opts->fpmath)
4239 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4250 /* Remember the last target of ix86_set_current_function. */
4251 static GTY(()) tree ix86_previous_fndecl;
4253 /* Establish appropriate back-end context for processing the function
4254 FNDECL. The argument might be NULL to indicate processing at top
4255 level, outside of any function scope. */
4257 ix86_set_current_function (tree fndecl)
4259 /* Only change the context if the function changes. This hook is called
4260 several times in the course of compiling a function, and we don't want to
4261 slow things down too much or call target_reinit when it isn't safe. */
4262 if (fndecl && fndecl != ix86_previous_fndecl)
4264 tree old_tree = (ix86_previous_fndecl
4265 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4268 tree new_tree = (fndecl
4269 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4272 ix86_previous_fndecl = fndecl;
4273 if (old_tree == new_tree)
4278 cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
4284 struct cl_target_option *def
4285 = TREE_TARGET_OPTION (target_option_current_node);
4287 cl_target_option_restore (def);
4294 /* Return true if this goes in large data/bss. */
4297 ix86_in_large_data_p (tree exp)
4299 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4302 /* Functions are never large data. */
4303 if (TREE_CODE (exp) == FUNCTION_DECL)
4306 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4308 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4309 if (strcmp (section, ".ldata") == 0
4310 || strcmp (section, ".lbss") == 0)
4316 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4318 /* If this is an incomplete type with size 0, then we can't put it
4319 in data because it might be too big when completed. */
4320 if (!size || size > ix86_section_threshold)
4327 /* Switch to the appropriate section for output of DECL.
4328 DECL is either a `VAR_DECL' node or a constant of some sort.
4329 RELOC indicates whether forming the initial value of DECL requires
4330 link-time relocations. */
4332 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4336 x86_64_elf_select_section (tree decl, int reloc,
4337 unsigned HOST_WIDE_INT align)
4339 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4340 && ix86_in_large_data_p (decl))
4342 const char *sname = NULL;
4343 unsigned int flags = SECTION_WRITE;
4344 switch (categorize_decl_for_section (decl, reloc))
4349 case SECCAT_DATA_REL:
4350 sname = ".ldata.rel";
4352 case SECCAT_DATA_REL_LOCAL:
4353 sname = ".ldata.rel.local";
4355 case SECCAT_DATA_REL_RO:
4356 sname = ".ldata.rel.ro";
4358 case SECCAT_DATA_REL_RO_LOCAL:
4359 sname = ".ldata.rel.ro.local";
4363 flags |= SECTION_BSS;
4366 case SECCAT_RODATA_MERGE_STR:
4367 case SECCAT_RODATA_MERGE_STR_INIT:
4368 case SECCAT_RODATA_MERGE_CONST:
4372 case SECCAT_SRODATA:
4379 /* We don't split these for medium model. Place them into
4380 default sections and hope for best. */
4385 /* We might get called with string constants, but get_named_section
4386 doesn't like them as they are not DECLs. Also, we need to set
4387 flags in that case. */
4389 return get_section (sname, flags, NULL);
4390 return get_named_section (decl, sname, reloc);
4393 return default_elf_select_section (decl, reloc, align);
4396 /* Build up a unique section name, expressed as a
4397 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4398 RELOC indicates whether the initial value of EXP requires
4399 link-time relocations. */
4401 static void ATTRIBUTE_UNUSED
4402 x86_64_elf_unique_section (tree decl, int reloc)
4404 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4405 && ix86_in_large_data_p (decl))
4407 const char *prefix = NULL;
4408 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4409 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4411 switch (categorize_decl_for_section (decl, reloc))
4414 case SECCAT_DATA_REL:
4415 case SECCAT_DATA_REL_LOCAL:
4416 case SECCAT_DATA_REL_RO:
4417 case SECCAT_DATA_REL_RO_LOCAL:
4418 prefix = one_only ? ".ld" : ".ldata";
4421 prefix = one_only ? ".lb" : ".lbss";
4424 case SECCAT_RODATA_MERGE_STR:
4425 case SECCAT_RODATA_MERGE_STR_INIT:
4426 case SECCAT_RODATA_MERGE_CONST:
4427 prefix = one_only ? ".lr" : ".lrodata";
4429 case SECCAT_SRODATA:
4436 /* We don't split these for medium model. Place them into
4437 default sections and hope for best. */
4442 const char *name, *linkonce;
4445 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4446 name = targetm.strip_name_encoding (name);
4448 /* If we're using one_only, then there needs to be a .gnu.linkonce
4449 prefix to the section name. */
4450 linkonce = one_only ? ".gnu.linkonce" : "";
4452 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4454 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4458 default_unique_section (decl, reloc);
4461 #ifdef COMMON_ASM_OP
4462 /* This says how to output assembler code to declare an
4463 uninitialized external linkage data object.
4465 For medium model x86-64 we need to use .largecomm opcode for
4468 x86_elf_aligned_common (FILE *file,
4469 const char *name, unsigned HOST_WIDE_INT size,
4472 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4473 && size > (unsigned int)ix86_section_threshold)
4474 fputs (".largecomm\t", file);
4476 fputs (COMMON_ASM_OP, file);
4477 assemble_name (file, name);
4478 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4479 size, align / BITS_PER_UNIT);
4483 /* Utility function for targets to use in implementing
4484 ASM_OUTPUT_ALIGNED_BSS. */
4487 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4488 const char *name, unsigned HOST_WIDE_INT size,
4491 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4492 && size > (unsigned int)ix86_section_threshold)
4493 switch_to_section (get_named_section (decl, ".lbss", 0));
4495 switch_to_section (bss_section);
4496 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4497 #ifdef ASM_DECLARE_OBJECT_NAME
4498 last_assemble_variable_decl = decl;
4499 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4501 /* Standard thing is just output label for the object. */
4502 ASM_OUTPUT_LABEL (file, name);
4503 #endif /* ASM_DECLARE_OBJECT_NAME */
4504 ASM_OUTPUT_SKIP (file, size ? size : 1);
4508 optimization_options (int level, int size ATTRIBUTE_UNUSED)
4510 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
4511 make the problem with not enough registers even worse. */
4512 #ifdef INSN_SCHEDULING
4514 flag_schedule_insns = 0;
4518 /* The Darwin libraries never set errno, so we might as well
4519 avoid calling them when that's the only reason we would. */
4520 flag_errno_math = 0;
4522 /* The default values of these switches depend on the TARGET_64BIT
4523 that is not known at this moment. Mark these values with 2 and
4524 let user the to override these. In case there is no command line option
4525 specifying them, we will set the defaults in override_options. */
4527 flag_omit_frame_pointer = 2;
4529 /* For -O2 and beyond, turn on -fzee for x86_64 target. */
4533 flag_pcc_struct_return = 2;
4534 flag_asynchronous_unwind_tables = 2;
4535 flag_vect_cost_model = 1;
4536 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4537 SUBTARGET_OPTIMIZATION_OPTIONS;
4541 /* Decide whether we must probe the stack before any space allocation
4542 on this target. It's essentially TARGET_STACK_PROBE except when
4543 -fstack-check causes the stack to be already probed differently. */
4546 ix86_target_stack_probe (void)
4548 /* Do not probe the stack twice if static stack checking is enabled. */
4549 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
4552 return TARGET_STACK_PROBE;
4555 /* Decide whether we can make a sibling call to a function. DECL is the
4556 declaration of the function being targeted by the call and EXP is the
4557 CALL_EXPR representing the call. */
4560 ix86_function_ok_for_sibcall (tree decl, tree exp)
4562 tree type, decl_or_type;
4565 /* If we are generating position-independent code, we cannot sibcall
4566 optimize any indirect call, or a direct call to a global function,
4567 as the PLT requires %ebx be live. */
4568 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4571 /* If we need to align the outgoing stack, then sibcalling would
4572 unalign the stack, which may break the called function. */
4573 if (ix86_minimum_incoming_stack_boundary (true)
4574 < PREFERRED_STACK_BOUNDARY)
4579 decl_or_type = decl;
4580 type = TREE_TYPE (decl);
4584 /* We're looking at the CALL_EXPR, we need the type of the function. */
4585 type = CALL_EXPR_FN (exp); /* pointer expression */
4586 type = TREE_TYPE (type); /* pointer type */
4587 type = TREE_TYPE (type); /* function type */
4588 decl_or_type = type;
4591 /* Check that the return value locations are the same. Like
4592 if we are returning floats on the 80387 register stack, we cannot
4593 make a sibcall from a function that doesn't return a float to a
4594 function that does or, conversely, from a function that does return
4595 a float to a function that doesn't; the necessary stack adjustment
4596 would not be executed. This is also the place we notice
4597 differences in the return value ABI. Note that it is ok for one
4598 of the functions to have void return type as long as the return
4599 value of the other is passed in a register. */
4600 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4601 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4603 if (STACK_REG_P (a) || STACK_REG_P (b))
4605 if (!rtx_equal_p (a, b))
4608 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4610 else if (!rtx_equal_p (a, b))
4615 /* The SYSV ABI has more call-clobbered registers;
4616 disallow sibcalls from MS to SYSV. */
4617 if (cfun->machine->call_abi == MS_ABI
4618 && ix86_function_type_abi (type) == SYSV_ABI)
4623 /* If this call is indirect, we'll need to be able to use a
4624 call-clobbered register for the address of the target function.
4625 Make sure that all such registers are not used for passing
4626 parameters. Note that DLLIMPORT functions are indirect. */
4628 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4630 if (ix86_function_regparm (type, NULL) >= 3)
4632 /* ??? Need to count the actual number of registers to be used,
4633 not the possible number of registers. Fix later. */
4639 /* Otherwise okay. That also includes certain types of indirect calls. */
4643 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4644 and "sseregparm" calling convention attributes;
4645 arguments as in struct attribute_spec.handler. */
4648 ix86_handle_cconv_attribute (tree *node, tree name,
4650 int flags ATTRIBUTE_UNUSED,
4653 if (TREE_CODE (*node) != FUNCTION_TYPE
4654 && TREE_CODE (*node) != METHOD_TYPE
4655 && TREE_CODE (*node) != FIELD_DECL
4656 && TREE_CODE (*node) != TYPE_DECL)
4658 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4660 *no_add_attrs = true;
4664 /* Can combine regparm with all attributes but fastcall. */
4665 if (is_attribute_p ("regparm", name))
4669 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4671 error ("fastcall and regparm attributes are not compatible");
4674 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4676 error ("regparam and thiscall attributes are not compatible");
4679 cst = TREE_VALUE (args);
4680 if (TREE_CODE (cst) != INTEGER_CST)
4682 warning (OPT_Wattributes,
4683 "%qE attribute requires an integer constant argument",
4685 *no_add_attrs = true;
4687 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4689 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4691 *no_add_attrs = true;
4699 /* Do not warn when emulating the MS ABI. */
4700 if ((TREE_CODE (*node) != FUNCTION_TYPE
4701 && TREE_CODE (*node) != METHOD_TYPE)
4702 || ix86_function_type_abi (*node) != MS_ABI)
4703 warning (OPT_Wattributes, "%qE attribute ignored",
4705 *no_add_attrs = true;
4709 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4710 if (is_attribute_p ("fastcall", name))
4712 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4714 error ("fastcall and cdecl attributes are not compatible");
4716 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4718 error ("fastcall and stdcall attributes are not compatible");
4720 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4722 error ("fastcall and regparm attributes are not compatible");
4724 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4726 error ("fastcall and thiscall attributes are not compatible");
4730 /* Can combine stdcall with fastcall (redundant), regparm and
4732 else if (is_attribute_p ("stdcall", name))
4734 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4736 error ("stdcall and cdecl attributes are not compatible");
4738 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4740 error ("stdcall and fastcall attributes are not compatible");
4742 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4744 error ("stdcall and thiscall attributes are not compatible");
4748 /* Can combine cdecl with regparm and sseregparm. */
4749 else if (is_attribute_p ("cdecl", name))
4751 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4753 error ("stdcall and cdecl attributes are not compatible");
4755 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4757 error ("fastcall and cdecl attributes are not compatible");
4759 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4761 error ("cdecl and thiscall attributes are not compatible");
4764 else if (is_attribute_p ("thiscall", name))
4766 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
4767 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
4769 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4771 error ("stdcall and thiscall attributes are not compatible");
4773 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4775 error ("fastcall and thiscall attributes are not compatible");
4777 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4779 error ("cdecl and thiscall attributes are not compatible");
4783 /* Can combine sseregparm with all attributes. */
4788 /* Return 0 if the attributes for two types are incompatible, 1 if they
4789 are compatible, and 2 if they are nearly compatible (which causes a
4790 warning to be generated). */
4793 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4795 /* Check for mismatch of non-default calling convention. */
4796 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4798 if (TREE_CODE (type1) != FUNCTION_TYPE
4799 && TREE_CODE (type1) != METHOD_TYPE)
4802 /* Check for mismatched fastcall/regparm types. */
4803 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4804 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4805 || (ix86_function_regparm (type1, NULL)
4806 != ix86_function_regparm (type2, NULL)))
4809 /* Check for mismatched sseregparm types. */
4810 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4811 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4814 /* Check for mismatched thiscall types. */
4815 if (!lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type1))
4816 != !lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type2)))
4819 /* Check for mismatched return types (cdecl vs stdcall). */
4820 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4821 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4827 /* Return the regparm value for a function with the indicated TYPE and DECL.
4828 DECL may be NULL when calling function indirectly
4829 or considering a libcall. */
4832 ix86_function_regparm (const_tree type, const_tree decl)
4838 return (ix86_function_type_abi (type) == SYSV_ABI
4839 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
4841 regparm = ix86_regparm;
4842 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4845 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4849 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4852 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
4855 /* Use register calling convention for local functions when possible. */
4857 && TREE_CODE (decl) == FUNCTION_DECL
4859 && !(profile_flag && !flag_fentry))
4861 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4862 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
4865 int local_regparm, globals = 0, regno;
4867 /* Make sure no regparm register is taken by a
4868 fixed register variable. */
4869 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4870 if (fixed_regs[local_regparm])
4873 /* We don't want to use regparm(3) for nested functions as
4874 these use a static chain pointer in the third argument. */
4875 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
4878 /* Each fixed register usage increases register pressure,
4879 so less registers should be used for argument passing.
4880 This functionality can be overriden by an explicit
4882 for (regno = 0; regno <= DI_REG; regno++)
4883 if (fixed_regs[regno])
4887 = globals < local_regparm ? local_regparm - globals : 0;
4889 if (local_regparm > regparm)
4890 regparm = local_regparm;
4897 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4898 DFmode (2) arguments in SSE registers for a function with the
4899 indicated TYPE and DECL. DECL may be NULL when calling function
4900 indirectly or considering a libcall. Otherwise return 0. */
4903 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4905 gcc_assert (!TARGET_64BIT);
4907 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4908 by the sseregparm attribute. */
4909 if (TARGET_SSEREGPARM
4910 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4917 error ("Calling %qD with attribute sseregparm without "
4918 "SSE/SSE2 enabled", decl);
4920 error ("Calling %qT with attribute sseregparm without "
4921 "SSE/SSE2 enabled", type);
4929 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4930 (and DFmode for SSE2) arguments in SSE registers. */
4931 if (decl && TARGET_SSE_MATH && optimize
4932 && !(profile_flag && !flag_fentry))
4934 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4935 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4937 return TARGET_SSE2 ? 2 : 1;
4943 /* Return true if EAX is live at the start of the function. Used by
4944 ix86_expand_prologue to determine if we need special help before
4945 calling allocate_stack_worker. */
4948 ix86_eax_live_at_start_p (void)
4950 /* Cheat. Don't bother working forward from ix86_function_regparm
4951 to the function type to whether an actual argument is located in
4952 eax. Instead just look at cfg info, which is still close enough
4953 to correct at this point. This gives false positives for broken
4954 functions that might use uninitialized data that happens to be
4955 allocated in eax, but who cares? */
4956 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4959 /* Value is the number of bytes of arguments automatically
4960 popped when returning from a subroutine call.
4961 FUNDECL is the declaration node of the function (as a tree),
4962 FUNTYPE is the data type of the function (as a tree),
4963 or for a library call it is an identifier node for the subroutine name.
4964 SIZE is the number of bytes of arguments passed on the stack.
4966 On the 80386, the RTD insn may be used to pop them if the number
4967 of args is fixed, but if the number is variable then the caller
4968 must pop them all. RTD can't be used for library calls now
4969 because the library is compiled with the Unix compiler.
4970 Use of RTD is a selectable option, since it is incompatible with
4971 standard Unix calling sequences. If the option is not selected,
4972 the caller must always pop the args.
4974 The attribute stdcall is equivalent to RTD on a per module basis. */
4977 ix86_return_pops_args (tree fundecl, tree funtype, int size)
4981 /* None of the 64-bit ABIs pop arguments. */
4985 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
4987 /* Cdecl functions override -mrtd, and never pop the stack. */
4988 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
4990 /* Stdcall and fastcall functions will pop the stack if not
4992 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
4993 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype))
4994 || lookup_attribute ("thiscall", TYPE_ATTRIBUTES (funtype)))
4997 if (rtd && ! stdarg_p (funtype))
5001 /* Lose any fake structure return argument if it is passed on the stack. */
5002 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5003 && !KEEP_AGGREGATE_RETURN_POINTER)
5005 int nregs = ix86_function_regparm (funtype, fundecl);
5007 return GET_MODE_SIZE (Pmode);
5013 /* Argument support functions. */
5015 /* Return true when register may be used to pass function parameters. */
5017 ix86_function_arg_regno_p (int regno)
5020 const int *parm_regs;
5025 return (regno < REGPARM_MAX
5026 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5028 return (regno < REGPARM_MAX
5029 || (TARGET_MMX && MMX_REGNO_P (regno)
5030 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5031 || (TARGET_SSE && SSE_REGNO_P (regno)
5032 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5037 if (SSE_REGNO_P (regno) && TARGET_SSE)
5042 if (TARGET_SSE && SSE_REGNO_P (regno)
5043 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5047 /* TODO: The function should depend on current function ABI but
5048 builtins.c would need updating then. Therefore we use the
5051 /* RAX is used as hidden argument to va_arg functions. */
5052 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5055 if (ix86_abi == MS_ABI)
5056 parm_regs = x86_64_ms_abi_int_parameter_registers;
5058 parm_regs = x86_64_int_parameter_registers;
5059 for (i = 0; i < (ix86_abi == MS_ABI
5060 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5061 if (regno == parm_regs[i])
5066 /* Return if we do not know how to pass TYPE solely in registers. */
5069 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5071 if (must_pass_in_stack_var_size_or_pad (mode, type))
5074 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5075 The layout_type routine is crafty and tries to trick us into passing
5076 currently unsupported vector types on the stack by using TImode. */
5077 return (!TARGET_64BIT && mode == TImode
5078 && type && TREE_CODE (type) != VECTOR_TYPE);
5081 /* It returns the size, in bytes, of the area reserved for arguments passed
5082 in registers for the function represented by fndecl dependent to the used
5085 ix86_reg_parm_stack_space (const_tree fndecl)
5087 enum calling_abi call_abi = SYSV_ABI;
5088 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5089 call_abi = ix86_function_abi (fndecl);
5091 call_abi = ix86_function_type_abi (fndecl);
5092 if (call_abi == MS_ABI)
5097 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5100 ix86_function_type_abi (const_tree fntype)
5102 if (TARGET_64BIT && fntype != NULL)
5104 enum calling_abi abi = ix86_abi;
5105 if (abi == SYSV_ABI)
5107 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5110 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5118 ix86_function_ms_hook_prologue (const_tree fntype)
5120 if (fntype && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fntype)))
5122 if (decl_function_context (fntype) != NULL_TREE)
5124 error_at (DECL_SOURCE_LOCATION (fntype),
5125 "ms_hook_prologue is not compatible with nested function");
5133 static enum calling_abi
5134 ix86_function_abi (const_tree fndecl)
5138 return ix86_function_type_abi (TREE_TYPE (fndecl));
5141 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5144 ix86_cfun_abi (void)
5146 if (! cfun || ! TARGET_64BIT)
5148 return cfun->machine->call_abi;
5151 /* Write the extra assembler code needed to declare a function properly. */
5154 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5157 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5161 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5162 unsigned int filler_cc = 0xcccccccc;
5164 for (i = 0; i < filler_count; i += 4)
5165 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5168 ASM_OUTPUT_LABEL (asm_out_file, fname);
5170 /* Output magic byte marker, if hot-patch attribute is set.
5171 For x86 case frame-pointer prologue will be emitted in
5176 /* leaq [%rsp + 0], %rsp */
5177 asm_fprintf (asm_out_file, ASM_BYTE
5178 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5180 /* movl.s %edi, %edi. */
5181 asm_fprintf (asm_out_file, ASM_BYTE "0x8b, 0xff\n");
5186 extern void init_regs (void);
5188 /* Implementation of call abi switching target hook. Specific to FNDECL
5189 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
5190 for more details. */
5192 ix86_call_abi_override (const_tree fndecl)
5194 if (fndecl == NULL_TREE)
5195 cfun->machine->call_abi = ix86_abi;
5197 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5200 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
5201 re-initialization of init_regs each time we switch function context since
5202 this is needed only during RTL expansion. */
5204 ix86_maybe_switch_abi (void)
5207 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5211 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5212 for a call to a function whose data type is FNTYPE.
5213 For a library call, FNTYPE is 0. */
5216 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5217 tree fntype, /* tree ptr for function decl */
5218 rtx libname, /* SYMBOL_REF of library name or 0 */
5221 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
5222 memset (cum, 0, sizeof (*cum));
5225 cum->call_abi = ix86_function_abi (fndecl);
5227 cum->call_abi = ix86_function_type_abi (fntype);
5228 /* Set up the number of registers to use for passing arguments. */
5230 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5231 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5232 "or subtarget optimization implying it");
5233 cum->nregs = ix86_regparm;
5236 cum->nregs = (cum->call_abi == SYSV_ABI
5237 ? X86_64_REGPARM_MAX
5238 : X86_64_MS_REGPARM_MAX);
5242 cum->sse_nregs = SSE_REGPARM_MAX;
5245 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5246 ? X86_64_SSE_REGPARM_MAX
5247 : X86_64_MS_SSE_REGPARM_MAX);
5251 cum->mmx_nregs = MMX_REGPARM_MAX;
5252 cum->warn_avx = true;
5253 cum->warn_sse = true;
5254 cum->warn_mmx = true;
5256 /* Because type might mismatch in between caller and callee, we need to
5257 use actual type of function for local calls.
5258 FIXME: cgraph_analyze can be told to actually record if function uses
5259 va_start so for local functions maybe_vaarg can be made aggressive
5261 FIXME: once typesytem is fixed, we won't need this code anymore. */
5263 fntype = TREE_TYPE (fndecl);
5264 cum->maybe_vaarg = (fntype
5265 ? (!prototype_p (fntype) || stdarg_p (fntype))
5270 /* If there are variable arguments, then we won't pass anything
5271 in registers in 32-bit mode. */
5272 if (stdarg_p (fntype))
5283 /* Use ecx and edx registers if function has fastcall attribute,
5284 else look for regparm information. */
5287 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
5290 cum->fastcall = 1; /* Same first register as in fastcall. */
5292 else if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
5298 cum->nregs = ix86_function_regparm (fntype, fndecl);
5301 /* Set up the number of SSE registers used for passing SFmode
5302 and DFmode arguments. Warn for mismatching ABI. */
5303 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
5307 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5308 But in the case of vector types, it is some vector mode.
5310 When we have only some of our vector isa extensions enabled, then there
5311 are some modes for which vector_mode_supported_p is false. For these
5312 modes, the generic vector support in gcc will choose some non-vector mode
5313 in order to implement the type. By computing the natural mode, we'll
5314 select the proper ABI location for the operand and not depend on whatever
5315 the middle-end decides to do with these vector types.
5317 The midde-end can't deal with the vector types > 16 bytes. In this
5318 case, we return the original mode and warn ABI change if CUM isn't
5321 static enum machine_mode
5322 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
5324 enum machine_mode mode = TYPE_MODE (type);
5326 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
5328 HOST_WIDE_INT size = int_size_in_bytes (type);
5329 if ((size == 8 || size == 16 || size == 32)
5330 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5331 && TYPE_VECTOR_SUBPARTS (type) > 1)
5333 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
5335 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5336 mode = MIN_MODE_VECTOR_FLOAT;
5338 mode = MIN_MODE_VECTOR_INT;
5340 /* Get the mode which has this inner mode and number of units. */
5341 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
5342 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
5343 && GET_MODE_INNER (mode) == innermode)
5345 if (size == 32 && !TARGET_AVX)
5347 static bool warnedavx;
5354 warning (0, "AVX vector argument without AVX "
5355 "enabled changes the ABI");
5357 return TYPE_MODE (type);
5370 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5371 this may not agree with the mode that the type system has chosen for the
5372 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5373 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5376 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5381 if (orig_mode != BLKmode)
5382 tmp = gen_rtx_REG (orig_mode, regno);
5385 tmp = gen_rtx_REG (mode, regno);
5386 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5387 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5393 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5394 of this code is to classify each 8bytes of incoming argument by the register
5395 class and assign registers accordingly. */
5397 /* Return the union class of CLASS1 and CLASS2.
5398 See the x86-64 PS ABI for details. */
5400 static enum x86_64_reg_class
5401 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5403 /* Rule #1: If both classes are equal, this is the resulting class. */
5404 if (class1 == class2)
5407 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5409 if (class1 == X86_64_NO_CLASS)
5411 if (class2 == X86_64_NO_CLASS)
5414 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5415 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5416 return X86_64_MEMORY_CLASS;
5418 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5419 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5420 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5421 return X86_64_INTEGERSI_CLASS;
5422 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5423 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5424 return X86_64_INTEGER_CLASS;
5426 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5428 if (class1 == X86_64_X87_CLASS
5429 || class1 == X86_64_X87UP_CLASS
5430 || class1 == X86_64_COMPLEX_X87_CLASS
5431 || class2 == X86_64_X87_CLASS
5432 || class2 == X86_64_X87UP_CLASS
5433 || class2 == X86_64_COMPLEX_X87_CLASS)
5434 return X86_64_MEMORY_CLASS;
5436 /* Rule #6: Otherwise class SSE is used. */
5437 return X86_64_SSE_CLASS;
5440 /* Classify the argument of type TYPE and mode MODE.
5441 CLASSES will be filled by the register class used to pass each word
5442 of the operand. The number of words is returned. In case the parameter
5443 should be passed in memory, 0 is returned. As a special case for zero
5444 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5446 BIT_OFFSET is used internally for handling records and specifies offset
5447 of the offset in bits modulo 256 to avoid overflow cases.
5449 See the x86-64 PS ABI for details.
5453 classify_argument (enum machine_mode mode, const_tree type,
5454 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5456 HOST_WIDE_INT bytes =
5457 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5458 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5460 /* Variable sized entities are always passed/returned in memory. */
5464 if (mode != VOIDmode
5465 && targetm.calls.must_pass_in_stack (mode, type))
5468 if (type && AGGREGATE_TYPE_P (type))
5472 enum x86_64_reg_class subclasses[MAX_CLASSES];
5474 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5478 for (i = 0; i < words; i++)
5479 classes[i] = X86_64_NO_CLASS;
5481 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5482 signalize memory class, so handle it as special case. */
5485 classes[0] = X86_64_NO_CLASS;
5489 /* Classify each field of record and merge classes. */
5490 switch (TREE_CODE (type))
5493 /* And now merge the fields of structure. */
5494 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5496 if (TREE_CODE (field) == FIELD_DECL)
5500 if (TREE_TYPE (field) == error_mark_node)
5503 /* Bitfields are always classified as integer. Handle them
5504 early, since later code would consider them to be
5505 misaligned integers. */
5506 if (DECL_BIT_FIELD (field))
5508 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5509 i < ((int_bit_position (field) + (bit_offset % 64))
5510 + tree_low_cst (DECL_SIZE (field), 0)
5513 merge_classes (X86_64_INTEGER_CLASS,
5520 type = TREE_TYPE (field);
5522 /* Flexible array member is ignored. */
5523 if (TYPE_MODE (type) == BLKmode
5524 && TREE_CODE (type) == ARRAY_TYPE
5525 && TYPE_SIZE (type) == NULL_TREE
5526 && TYPE_DOMAIN (type) != NULL_TREE
5527 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5532 if (!warned && warn_psabi)
5535 inform (input_location,
5536 "The ABI of passing struct with"
5537 " a flexible array member has"
5538 " changed in GCC 4.4");
5542 num = classify_argument (TYPE_MODE (type), type,
5544 (int_bit_position (field)
5545 + bit_offset) % 256);
5548 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5549 for (i = 0; i < num && (i + pos) < words; i++)
5551 merge_classes (subclasses[i], classes[i + pos]);
5558 /* Arrays are handled as small records. */
5561 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5562 TREE_TYPE (type), subclasses, bit_offset);
5566 /* The partial classes are now full classes. */
5567 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5568 subclasses[0] = X86_64_SSE_CLASS;
5569 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5570 && !((bit_offset % 64) == 0 && bytes == 4))
5571 subclasses[0] = X86_64_INTEGER_CLASS;
5573 for (i = 0; i < words; i++)
5574 classes[i] = subclasses[i % num];
5579 case QUAL_UNION_TYPE:
5580 /* Unions are similar to RECORD_TYPE but offset is always 0.
5582 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5584 if (TREE_CODE (field) == FIELD_DECL)
5588 if (TREE_TYPE (field) == error_mark_node)
5591 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5592 TREE_TYPE (field), subclasses,
5596 for (i = 0; i < num; i++)
5597 classes[i] = merge_classes (subclasses[i], classes[i]);
5608 /* When size > 16 bytes, if the first one isn't
5609 X86_64_SSE_CLASS or any other ones aren't
5610 X86_64_SSEUP_CLASS, everything should be passed in
5612 if (classes[0] != X86_64_SSE_CLASS)
5615 for (i = 1; i < words; i++)
5616 if (classes[i] != X86_64_SSEUP_CLASS)
5620 /* Final merger cleanup. */
5621 for (i = 0; i < words; i++)
5623 /* If one class is MEMORY, everything should be passed in
5625 if (classes[i] == X86_64_MEMORY_CLASS)
5628 /* The X86_64_SSEUP_CLASS should be always preceded by
5629 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5630 if (classes[i] == X86_64_SSEUP_CLASS
5631 && classes[i - 1] != X86_64_SSE_CLASS
5632 && classes[i - 1] != X86_64_SSEUP_CLASS)
5634 /* The first one should never be X86_64_SSEUP_CLASS. */
5635 gcc_assert (i != 0);
5636 classes[i] = X86_64_SSE_CLASS;
5639 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5640 everything should be passed in memory. */
5641 if (classes[i] == X86_64_X87UP_CLASS
5642 && (classes[i - 1] != X86_64_X87_CLASS))
5646 /* The first one should never be X86_64_X87UP_CLASS. */
5647 gcc_assert (i != 0);
5648 if (!warned && warn_psabi)
5651 inform (input_location,
5652 "The ABI of passing union with long double"
5653 " has changed in GCC 4.4");
5661 /* Compute alignment needed. We align all types to natural boundaries with
5662 exception of XFmode that is aligned to 64bits. */
5663 if (mode != VOIDmode && mode != BLKmode)
5665 int mode_alignment = GET_MODE_BITSIZE (mode);
5668 mode_alignment = 128;
5669 else if (mode == XCmode)
5670 mode_alignment = 256;
5671 if (COMPLEX_MODE_P (mode))
5672 mode_alignment /= 2;
5673 /* Misaligned fields are always returned in memory. */
5674 if (bit_offset % mode_alignment)
5678 /* for V1xx modes, just use the base mode */
5679 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
5680 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5681 mode = GET_MODE_INNER (mode);
5683 /* Classification of atomic types. */
5688 classes[0] = X86_64_SSE_CLASS;
5691 classes[0] = X86_64_SSE_CLASS;
5692 classes[1] = X86_64_SSEUP_CLASS;
5702 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5706 classes[0] = X86_64_INTEGERSI_CLASS;
5709 else if (size <= 64)
5711 classes[0] = X86_64_INTEGER_CLASS;
5714 else if (size <= 64+32)
5716 classes[0] = X86_64_INTEGER_CLASS;
5717 classes[1] = X86_64_INTEGERSI_CLASS;
5720 else if (size <= 64+64)
5722 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5730 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5734 /* OImode shouldn't be used directly. */
5739 if (!(bit_offset % 64))
5740 classes[0] = X86_64_SSESF_CLASS;
5742 classes[0] = X86_64_SSE_CLASS;
5745 classes[0] = X86_64_SSEDF_CLASS;
5748 classes[0] = X86_64_X87_CLASS;
5749 classes[1] = X86_64_X87UP_CLASS;
5752 classes[0] = X86_64_SSE_CLASS;
5753 classes[1] = X86_64_SSEUP_CLASS;
5756 classes[0] = X86_64_SSE_CLASS;
5757 if (!(bit_offset % 64))
5763 if (!warned && warn_psabi)
5766 inform (input_location,
5767 "The ABI of passing structure with complex float"
5768 " member has changed in GCC 4.4");
5770 classes[1] = X86_64_SSESF_CLASS;
5774 classes[0] = X86_64_SSEDF_CLASS;
5775 classes[1] = X86_64_SSEDF_CLASS;
5778 classes[0] = X86_64_COMPLEX_X87_CLASS;
5781 /* This modes is larger than 16 bytes. */
5789 classes[0] = X86_64_SSE_CLASS;
5790 classes[1] = X86_64_SSEUP_CLASS;
5791 classes[2] = X86_64_SSEUP_CLASS;
5792 classes[3] = X86_64_SSEUP_CLASS;
5800 classes[0] = X86_64_SSE_CLASS;
5801 classes[1] = X86_64_SSEUP_CLASS;
5809 classes[0] = X86_64_SSE_CLASS;
5815 gcc_assert (VECTOR_MODE_P (mode));
5820 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5822 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5823 classes[0] = X86_64_INTEGERSI_CLASS;
5825 classes[0] = X86_64_INTEGER_CLASS;
5826 classes[1] = X86_64_INTEGER_CLASS;
5827 return 1 + (bytes > 8);
5831 /* Examine the argument and return set number of register required in each
5832 class. Return 0 iff parameter should be passed in memory. */
5834 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5835 int *int_nregs, int *sse_nregs)
5837 enum x86_64_reg_class regclass[MAX_CLASSES];
5838 int n = classify_argument (mode, type, regclass, 0);
5844 for (n--; n >= 0; n--)
5845 switch (regclass[n])
5847 case X86_64_INTEGER_CLASS:
5848 case X86_64_INTEGERSI_CLASS:
5851 case X86_64_SSE_CLASS:
5852 case X86_64_SSESF_CLASS:
5853 case X86_64_SSEDF_CLASS:
5856 case X86_64_NO_CLASS:
5857 case X86_64_SSEUP_CLASS:
5859 case X86_64_X87_CLASS:
5860 case X86_64_X87UP_CLASS:
5864 case X86_64_COMPLEX_X87_CLASS:
5865 return in_return ? 2 : 0;
5866 case X86_64_MEMORY_CLASS:
5872 /* Construct container for the argument used by GCC interface. See
5873 FUNCTION_ARG for the detailed description. */
5876 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5877 const_tree type, int in_return, int nintregs, int nsseregs,
5878 const int *intreg, int sse_regno)
5880 /* The following variables hold the static issued_error state. */
5881 static bool issued_sse_arg_error;
5882 static bool issued_sse_ret_error;
5883 static bool issued_x87_ret_error;
5885 enum machine_mode tmpmode;
5887 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5888 enum x86_64_reg_class regclass[MAX_CLASSES];
5892 int needed_sseregs, needed_intregs;
5893 rtx exp[MAX_CLASSES];
5896 n = classify_argument (mode, type, regclass, 0);
5899 if (!examine_argument (mode, type, in_return, &needed_intregs,
5902 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5905 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5906 some less clueful developer tries to use floating-point anyway. */
5907 if (needed_sseregs && !TARGET_SSE)
5911 if (!issued_sse_ret_error)
5913 error ("SSE register return with SSE disabled");
5914 issued_sse_ret_error = true;
5917 else if (!issued_sse_arg_error)
5919 error ("SSE register argument with SSE disabled");
5920 issued_sse_arg_error = true;
5925 /* Likewise, error if the ABI requires us to return values in the
5926 x87 registers and the user specified -mno-80387. */
5927 if (!TARGET_80387 && in_return)
5928 for (i = 0; i < n; i++)
5929 if (regclass[i] == X86_64_X87_CLASS
5930 || regclass[i] == X86_64_X87UP_CLASS
5931 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5933 if (!issued_x87_ret_error)
5935 error ("x87 register return with x87 disabled");
5936 issued_x87_ret_error = true;
5941 /* First construct simple cases. Avoid SCmode, since we want to use
5942 single register to pass this type. */
5943 if (n == 1 && mode != SCmode)
5944 switch (regclass[0])
5946 case X86_64_INTEGER_CLASS:
5947 case X86_64_INTEGERSI_CLASS:
5948 return gen_rtx_REG (mode, intreg[0]);
5949 case X86_64_SSE_CLASS:
5950 case X86_64_SSESF_CLASS:
5951 case X86_64_SSEDF_CLASS:
5952 if (mode != BLKmode)
5953 return gen_reg_or_parallel (mode, orig_mode,
5954 SSE_REGNO (sse_regno));
5956 case X86_64_X87_CLASS:
5957 case X86_64_COMPLEX_X87_CLASS:
5958 return gen_rtx_REG (mode, FIRST_STACK_REG);
5959 case X86_64_NO_CLASS:
5960 /* Zero sized array, struct or class. */
5965 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5966 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5967 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5969 && regclass[0] == X86_64_SSE_CLASS
5970 && regclass[1] == X86_64_SSEUP_CLASS
5971 && regclass[2] == X86_64_SSEUP_CLASS
5972 && regclass[3] == X86_64_SSEUP_CLASS
5974 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5977 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
5978 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
5979 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
5980 && regclass[1] == X86_64_INTEGER_CLASS
5981 && (mode == CDImode || mode == TImode || mode == TFmode)
5982 && intreg[0] + 1 == intreg[1])
5983 return gen_rtx_REG (mode, intreg[0]);
5985 /* Otherwise figure out the entries of the PARALLEL. */
5986 for (i = 0; i < n; i++)
5990 switch (regclass[i])
5992 case X86_64_NO_CLASS:
5994 case X86_64_INTEGER_CLASS:
5995 case X86_64_INTEGERSI_CLASS:
5996 /* Merge TImodes on aligned occasions here too. */
5997 if (i * 8 + 8 > bytes)
5998 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
5999 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6003 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6004 if (tmpmode == BLKmode)
6006 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6007 gen_rtx_REG (tmpmode, *intreg),
6011 case X86_64_SSESF_CLASS:
6012 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6013 gen_rtx_REG (SFmode,
6014 SSE_REGNO (sse_regno)),
6018 case X86_64_SSEDF_CLASS:
6019 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6020 gen_rtx_REG (DFmode,
6021 SSE_REGNO (sse_regno)),
6025 case X86_64_SSE_CLASS:
6033 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6043 && regclass[1] == X86_64_SSEUP_CLASS
6044 && regclass[2] == X86_64_SSEUP_CLASS
6045 && regclass[3] == X86_64_SSEUP_CLASS);
6052 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6053 gen_rtx_REG (tmpmode,
6054 SSE_REGNO (sse_regno)),
6063 /* Empty aligned struct, union or class. */
6067 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6068 for (i = 0; i < nexps; i++)
6069 XVECEXP (ret, 0, i) = exp [i];
6073 /* Update the data in CUM to advance over an argument of mode MODE
6074 and data type TYPE. (TYPE is null for libcalls where that information
6075 may not be available.) */
6078 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6079 const_tree type, HOST_WIDE_INT bytes,
6080 HOST_WIDE_INT words)
6096 cum->words += words;
6097 cum->nregs -= words;
6098 cum->regno += words;
6100 if (cum->nregs <= 0)
6108 /* OImode shouldn't be used directly. */
6112 if (cum->float_in_sse < 2)
6115 if (cum->float_in_sse < 1)
6132 if (!type || !AGGREGATE_TYPE_P (type))
6134 cum->sse_words += words;
6135 cum->sse_nregs -= 1;
6136 cum->sse_regno += 1;
6137 if (cum->sse_nregs <= 0)
6151 if (!type || !AGGREGATE_TYPE_P (type))
6153 cum->mmx_words += words;
6154 cum->mmx_nregs -= 1;
6155 cum->mmx_regno += 1;
6156 if (cum->mmx_nregs <= 0)
6167 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6168 const_tree type, HOST_WIDE_INT words, bool named)
6170 int int_nregs, sse_nregs;
6172 /* Unnamed 256bit vector mode parameters are passed on stack. */
6173 if (!named && VALID_AVX256_REG_MODE (mode))
6176 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6177 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6179 cum->nregs -= int_nregs;
6180 cum->sse_nregs -= sse_nregs;
6181 cum->regno += int_nregs;
6182 cum->sse_regno += sse_nregs;
6186 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6187 cum->words = (cum->words + align - 1) & ~(align - 1);
6188 cum->words += words;
6193 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6194 HOST_WIDE_INT words)
6196 /* Otherwise, this should be passed indirect. */
6197 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6199 cum->words += words;
6207 /* Update the data in CUM to advance over an argument of mode MODE and
6208 data type TYPE. (TYPE is null for libcalls where that information
6209 may not be available.) */
6212 ix86_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6213 const_tree type, bool named)
6215 HOST_WIDE_INT bytes, words;
6217 if (mode == BLKmode)
6218 bytes = int_size_in_bytes (type);
6220 bytes = GET_MODE_SIZE (mode);
6221 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6224 mode = type_natural_mode (type, NULL);
6226 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6227 function_arg_advance_ms_64 (cum, bytes, words);
6228 else if (TARGET_64BIT)
6229 function_arg_advance_64 (cum, mode, type, words, named);
6231 function_arg_advance_32 (cum, mode, type, bytes, words);
6234 /* Define where to put the arguments to a function.
6235 Value is zero to push the argument on the stack,
6236 or a hard register in which to store the argument.
6238 MODE is the argument's machine mode.
6239 TYPE is the data type of the argument (as a tree).
6240 This is null for libcalls where that information may
6242 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6243 the preceding args and about the function being called.
6244 NAMED is nonzero if this argument is a named parameter
6245 (otherwise it is an extra parameter matching an ellipsis). */
6248 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6249 enum machine_mode orig_mode, const_tree type,
6250 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6252 static bool warnedsse, warnedmmx;
6254 /* Avoid the AL settings for the Unix64 ABI. */
6255 if (mode == VOIDmode)
6271 if (words <= cum->nregs)
6273 int regno = cum->regno;
6275 /* Fastcall allocates the first two DWORD (SImode) or
6276 smaller arguments to ECX and EDX if it isn't an
6282 || (type && AGGREGATE_TYPE_P (type)))
6285 /* ECX not EAX is the first allocated register. */
6286 if (regno == AX_REG)
6289 return gen_rtx_REG (mode, regno);
6294 if (cum->float_in_sse < 2)
6297 if (cum->float_in_sse < 1)
6301 /* In 32bit, we pass TImode in xmm registers. */
6308 if (!type || !AGGREGATE_TYPE_P (type))
6310 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
6313 warning (0, "SSE vector argument without SSE enabled "
6317 return gen_reg_or_parallel (mode, orig_mode,
6318 cum->sse_regno + FIRST_SSE_REG);
6323 /* OImode shouldn't be used directly. */
6332 if (!type || !AGGREGATE_TYPE_P (type))
6335 return gen_reg_or_parallel (mode, orig_mode,
6336 cum->sse_regno + FIRST_SSE_REG);
6346 if (!type || !AGGREGATE_TYPE_P (type))
6348 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
6351 warning (0, "MMX vector argument without MMX enabled "
6355 return gen_reg_or_parallel (mode, orig_mode,
6356 cum->mmx_regno + FIRST_MMX_REG);
6365 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6366 enum machine_mode orig_mode, const_tree type, bool named)
6368 /* Handle a hidden AL argument containing number of registers
6369 for varargs x86-64 functions. */
6370 if (mode == VOIDmode)
6371 return GEN_INT (cum->maybe_vaarg
6372 ? (cum->sse_nregs < 0
6373 ? X86_64_SSE_REGPARM_MAX
6388 /* Unnamed 256bit vector mode parameters are passed on stack. */
6394 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6396 &x86_64_int_parameter_registers [cum->regno],
6401 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6402 enum machine_mode orig_mode, bool named,
6403 HOST_WIDE_INT bytes)
6407 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6408 We use value of -2 to specify that current function call is MSABI. */
6409 if (mode == VOIDmode)
6410 return GEN_INT (-2);
6412 /* If we've run out of registers, it goes on the stack. */
6413 if (cum->nregs == 0)
6416 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6418 /* Only floating point modes are passed in anything but integer regs. */
6419 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6422 regno = cum->regno + FIRST_SSE_REG;
6427 /* Unnamed floating parameters are passed in both the
6428 SSE and integer registers. */
6429 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
6430 t2 = gen_rtx_REG (mode, regno);
6431 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
6432 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
6433 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
6436 /* Handle aggregated types passed in register. */
6437 if (orig_mode == BLKmode)
6439 if (bytes > 0 && bytes <= 8)
6440 mode = (bytes > 4 ? DImode : SImode);
6441 if (mode == BLKmode)
6445 return gen_reg_or_parallel (mode, orig_mode, regno);
6448 /* Return where to put the arguments to a function.
6449 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6451 MODE is the argument's machine mode. TYPE is the data type of the
6452 argument. It is null for libcalls where that information may not be
6453 available. CUM gives information about the preceding args and about
6454 the function being called. NAMED is nonzero if this argument is a
6455 named parameter (otherwise it is an extra parameter matching an
6459 ix86_function_arg (const CUMULATIVE_ARGS *cum, enum machine_mode omode,
6460 const_tree type, bool named)
6462 enum machine_mode mode = omode;
6463 HOST_WIDE_INT bytes, words;
6465 if (mode == BLKmode)
6466 bytes = int_size_in_bytes (type);
6468 bytes = GET_MODE_SIZE (mode);
6469 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6471 /* To simplify the code below, represent vector types with a vector mode
6472 even if MMX/SSE are not active. */
6473 if (type && TREE_CODE (type) == VECTOR_TYPE)
6474 mode = type_natural_mode (type, cum);
6476 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6477 return function_arg_ms_64 (cum, mode, omode, named, bytes);
6478 else if (TARGET_64BIT)
6479 return function_arg_64 (cum, mode, omode, type, named);
6481 return function_arg_32 (cum, mode, omode, type, bytes, words);
6484 /* A C expression that indicates when an argument must be passed by
6485 reference. If nonzero for an argument, a copy of that argument is
6486 made in memory and a pointer to the argument is passed instead of
6487 the argument itself. The pointer is passed in whatever way is
6488 appropriate for passing a pointer to that type. */
6491 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
6492 enum machine_mode mode ATTRIBUTE_UNUSED,
6493 const_tree type, bool named ATTRIBUTE_UNUSED)
6495 /* See Windows x64 Software Convention. */
6496 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6498 int msize = (int) GET_MODE_SIZE (mode);
6501 /* Arrays are passed by reference. */
6502 if (TREE_CODE (type) == ARRAY_TYPE)
6505 if (AGGREGATE_TYPE_P (type))
6507 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6508 are passed by reference. */
6509 msize = int_size_in_bytes (type);
6513 /* __m128 is passed by reference. */
6515 case 1: case 2: case 4: case 8:
6521 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6527 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
6530 contains_aligned_value_p (const_tree type)
6532 enum machine_mode mode = TYPE_MODE (type);
6533 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6537 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6539 if (TYPE_ALIGN (type) < 128)
6542 if (AGGREGATE_TYPE_P (type))
6544 /* Walk the aggregates recursively. */
6545 switch (TREE_CODE (type))
6549 case QUAL_UNION_TYPE:
6553 /* Walk all the structure fields. */
6554 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6556 if (TREE_CODE (field) == FIELD_DECL
6557 && contains_aligned_value_p (TREE_TYPE (field)))
6564 /* Just for use if some languages passes arrays by value. */
6565 if (contains_aligned_value_p (TREE_TYPE (type)))
6576 /* Gives the alignment boundary, in bits, of an argument with the
6577 specified mode and type. */
6580 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
6585 /* Since the main variant type is used for call, we convert it to
6586 the main variant type. */
6587 type = TYPE_MAIN_VARIANT (type);
6588 align = TYPE_ALIGN (type);
6591 align = GET_MODE_ALIGNMENT (mode);
6592 if (align < PARM_BOUNDARY)
6593 align = PARM_BOUNDARY;
6594 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6595 natural boundaries. */
6596 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6598 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6599 make an exception for SSE modes since these require 128bit
6602 The handling here differs from field_alignment. ICC aligns MMX
6603 arguments to 4 byte boundaries, while structure fields are aligned
6604 to 8 byte boundaries. */
6607 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6608 align = PARM_BOUNDARY;
6612 if (!contains_aligned_value_p (type))
6613 align = PARM_BOUNDARY;
6616 if (align > BIGGEST_ALIGNMENT)
6617 align = BIGGEST_ALIGNMENT;
6621 /* Return true if N is a possible register number of function value. */
6624 ix86_function_value_regno_p (const unsigned int regno)
6631 case FIRST_FLOAT_REG:
6632 /* TODO: The function should depend on current function ABI but
6633 builtins.c would need updating then. Therefore we use the
6635 if (TARGET_64BIT && ix86_abi == MS_ABI)
6637 return TARGET_FLOAT_RETURNS_IN_80387;
6643 if (TARGET_MACHO || TARGET_64BIT)
6651 /* Define how to find the value returned by a function.
6652 VALTYPE is the data type of the value (as a tree).
6653 If the precise function being called is known, FUNC is its FUNCTION_DECL;
6654 otherwise, FUNC is 0. */
6657 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
6658 const_tree fntype, const_tree fn)
6662 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
6663 we normally prevent this case when mmx is not available. However
6664 some ABIs may require the result to be returned like DImode. */
6665 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6666 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
6668 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
6669 we prevent this case when sse is not available. However some ABIs
6670 may require the result to be returned like integer TImode. */
6671 else if (mode == TImode
6672 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6673 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
6675 /* 32-byte vector modes in %ymm0. */
6676 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
6677 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
6679 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
6680 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
6681 regno = FIRST_FLOAT_REG;
6683 /* Most things go in %eax. */
6686 /* Override FP return register with %xmm0 for local functions when
6687 SSE math is enabled or for functions with sseregparm attribute. */
6688 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
6690 int sse_level = ix86_function_sseregparm (fntype, fn, false);
6691 if ((sse_level >= 1 && mode == SFmode)
6692 || (sse_level == 2 && mode == DFmode))
6693 regno = FIRST_SSE_REG;
6696 /* OImode shouldn't be used directly. */
6697 gcc_assert (mode != OImode);
6699 return gen_rtx_REG (orig_mode, regno);
6703 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
6708 /* Handle libcalls, which don't provide a type node. */
6709 if (valtype == NULL)
6721 return gen_rtx_REG (mode, FIRST_SSE_REG);
6724 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
6728 return gen_rtx_REG (mode, AX_REG);
6732 ret = construct_container (mode, orig_mode, valtype, 1,
6733 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6734 x86_64_int_return_registers, 0);
6736 /* For zero sized structures, construct_container returns NULL, but we
6737 need to keep rest of compiler happy by returning meaningful value. */
6739 ret = gen_rtx_REG (orig_mode, AX_REG);
6745 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6747 unsigned int regno = AX_REG;
6751 switch (GET_MODE_SIZE (mode))
6754 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6755 && !COMPLEX_MODE_P (mode))
6756 regno = FIRST_SSE_REG;
6760 if (mode == SFmode || mode == DFmode)
6761 regno = FIRST_SSE_REG;
6767 return gen_rtx_REG (orig_mode, regno);
6771 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6772 enum machine_mode orig_mode, enum machine_mode mode)
6774 const_tree fn, fntype;
6777 if (fntype_or_decl && DECL_P (fntype_or_decl))
6778 fn = fntype_or_decl;
6779 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6781 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6782 return function_value_ms_64 (orig_mode, mode);
6783 else if (TARGET_64BIT)
6784 return function_value_64 (orig_mode, mode, valtype);
6786 return function_value_32 (orig_mode, mode, fntype, fn);
6790 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6791 bool outgoing ATTRIBUTE_UNUSED)
6793 enum machine_mode mode, orig_mode;
6795 orig_mode = TYPE_MODE (valtype);
6796 mode = type_natural_mode (valtype, NULL);
6797 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6801 ix86_libcall_value (enum machine_mode mode)
6803 return ix86_function_value_1 (NULL, NULL, mode, mode);
6806 /* Return true iff type is returned in memory. */
6808 static int ATTRIBUTE_UNUSED
6809 return_in_memory_32 (const_tree type, enum machine_mode mode)
6813 if (mode == BLKmode)
6816 size = int_size_in_bytes (type);
6818 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6821 if (VECTOR_MODE_P (mode) || mode == TImode)
6823 /* User-created vectors small enough to fit in EAX. */
6827 /* MMX/3dNow values are returned in MM0,
6828 except when it doesn't exits. */
6830 return (TARGET_MMX ? 0 : 1);
6832 /* SSE values are returned in XMM0, except when it doesn't exist. */
6834 return (TARGET_SSE ? 0 : 1);
6836 /* AVX values are returned in YMM0, except when it doesn't exist. */
6838 return TARGET_AVX ? 0 : 1;
6847 /* OImode shouldn't be used directly. */
6848 gcc_assert (mode != OImode);
6853 static int ATTRIBUTE_UNUSED
6854 return_in_memory_64 (const_tree type, enum machine_mode mode)
6856 int needed_intregs, needed_sseregs;
6857 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6860 static int ATTRIBUTE_UNUSED
6861 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6863 HOST_WIDE_INT size = int_size_in_bytes (type);
6865 /* __m128 is returned in xmm0. */
6866 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6867 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6870 /* Otherwise, the size must be exactly in [1248]. */
6871 return (size != 1 && size != 2 && size != 4 && size != 8);
6875 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6877 #ifdef SUBTARGET_RETURN_IN_MEMORY
6878 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6880 const enum machine_mode mode = type_natural_mode (type, NULL);
6884 if (ix86_function_type_abi (fntype) == MS_ABI)
6885 return return_in_memory_ms_64 (type, mode);
6887 return return_in_memory_64 (type, mode);
6890 return return_in_memory_32 (type, mode);
6894 /* Return false iff TYPE is returned in memory. This version is used
6895 on Solaris 2. It is similar to the generic ix86_return_in_memory,
6896 but differs notably in that when MMX is available, 8-byte vectors
6897 are returned in memory, rather than in MMX registers. */
6900 ix86_solaris_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6903 enum machine_mode mode = type_natural_mode (type, NULL);
6906 return return_in_memory_64 (type, mode);
6908 if (mode == BLKmode)
6911 size = int_size_in_bytes (type);
6913 if (VECTOR_MODE_P (mode))
6915 /* Return in memory only if MMX registers *are* available. This
6916 seems backwards, but it is consistent with the existing
6923 else if (mode == TImode)
6925 else if (mode == XFmode)
6931 /* When returning SSE vector types, we have a choice of either
6932 (1) being abi incompatible with a -march switch, or
6933 (2) generating an error.
6934 Given no good solution, I think the safest thing is one warning.
6935 The user won't be able to use -Werror, but....
6937 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6938 called in response to actually generating a caller or callee that
6939 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6940 via aggregate_value_p for general type probing from tree-ssa. */
6943 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6945 static bool warnedsse, warnedmmx;
6947 if (!TARGET_64BIT && type)
6949 /* Look at the return type of the function, not the function type. */
6950 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6952 if (!TARGET_SSE && !warnedsse)
6955 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6958 warning (0, "SSE vector return without SSE enabled "
6963 if (!TARGET_MMX && !warnedmmx)
6965 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6968 warning (0, "MMX vector return without MMX enabled "
6978 /* Create the va_list data type. */
6980 /* Returns the calling convention specific va_list date type.
6981 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
6984 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6986 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6988 /* For i386 we use plain pointer to argument area. */
6989 if (!TARGET_64BIT || abi == MS_ABI)
6990 return build_pointer_type (char_type_node);
6992 record = lang_hooks.types.make_type (RECORD_TYPE);
6993 type_decl = build_decl (BUILTINS_LOCATION,
6994 TYPE_DECL, get_identifier ("__va_list_tag"), record);
6996 f_gpr = build_decl (BUILTINS_LOCATION,
6997 FIELD_DECL, get_identifier ("gp_offset"),
6998 unsigned_type_node);
6999 f_fpr = build_decl (BUILTINS_LOCATION,
7000 FIELD_DECL, get_identifier ("fp_offset"),
7001 unsigned_type_node);
7002 f_ovf = build_decl (BUILTINS_LOCATION,
7003 FIELD_DECL, get_identifier ("overflow_arg_area"),
7005 f_sav = build_decl (BUILTINS_LOCATION,
7006 FIELD_DECL, get_identifier ("reg_save_area"),
7009 va_list_gpr_counter_field = f_gpr;
7010 va_list_fpr_counter_field = f_fpr;
7012 DECL_FIELD_CONTEXT (f_gpr) = record;
7013 DECL_FIELD_CONTEXT (f_fpr) = record;
7014 DECL_FIELD_CONTEXT (f_ovf) = record;
7015 DECL_FIELD_CONTEXT (f_sav) = record;
7017 TREE_CHAIN (record) = type_decl;
7018 TYPE_NAME (record) = type_decl;
7019 TYPE_FIELDS (record) = f_gpr;
7020 DECL_CHAIN (f_gpr) = f_fpr;
7021 DECL_CHAIN (f_fpr) = f_ovf;
7022 DECL_CHAIN (f_ovf) = f_sav;
7024 layout_type (record);
7026 /* The correct type is an array type of one element. */
7027 return build_array_type (record, build_index_type (size_zero_node));
7030 /* Setup the builtin va_list data type and for 64-bit the additional
7031 calling convention specific va_list data types. */
7034 ix86_build_builtin_va_list (void)
7036 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7038 /* Initialize abi specific va_list builtin types. */
7042 if (ix86_abi == MS_ABI)
7044 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7045 if (TREE_CODE (t) != RECORD_TYPE)
7046 t = build_variant_type_copy (t);
7047 sysv_va_list_type_node = t;
7052 if (TREE_CODE (t) != RECORD_TYPE)
7053 t = build_variant_type_copy (t);
7054 sysv_va_list_type_node = t;
7056 if (ix86_abi != MS_ABI)
7058 t = ix86_build_builtin_va_list_abi (MS_ABI);
7059 if (TREE_CODE (t) != RECORD_TYPE)
7060 t = build_variant_type_copy (t);
7061 ms_va_list_type_node = t;
7066 if (TREE_CODE (t) != RECORD_TYPE)
7067 t = build_variant_type_copy (t);
7068 ms_va_list_type_node = t;
7075 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7078 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7084 /* GPR size of varargs save area. */
7085 if (cfun->va_list_gpr_size)
7086 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7088 ix86_varargs_gpr_size = 0;
7090 /* FPR size of varargs save area. We don't need it if we don't pass
7091 anything in SSE registers. */
7092 if (TARGET_SSE && cfun->va_list_fpr_size)
7093 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7095 ix86_varargs_fpr_size = 0;
7097 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7100 save_area = frame_pointer_rtx;
7101 set = get_varargs_alias_set ();
7103 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7104 if (max > X86_64_REGPARM_MAX)
7105 max = X86_64_REGPARM_MAX;
7107 for (i = cum->regno; i < max; i++)
7109 mem = gen_rtx_MEM (Pmode,
7110 plus_constant (save_area, i * UNITS_PER_WORD));
7111 MEM_NOTRAP_P (mem) = 1;
7112 set_mem_alias_set (mem, set);
7113 emit_move_insn (mem, gen_rtx_REG (Pmode,
7114 x86_64_int_parameter_registers[i]));
7117 if (ix86_varargs_fpr_size)
7119 enum machine_mode smode;
7122 /* Now emit code to save SSE registers. The AX parameter contains number
7123 of SSE parameter registers used to call this function, though all we
7124 actually check here is the zero/non-zero status. */
7126 label = gen_label_rtx ();
7127 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7128 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7131 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7132 we used movdqa (i.e. TImode) instead? Perhaps even better would
7133 be if we could determine the real mode of the data, via a hook
7134 into pass_stdarg. Ignore all that for now. */
7136 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7137 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7139 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7140 if (max > X86_64_SSE_REGPARM_MAX)
7141 max = X86_64_SSE_REGPARM_MAX;
7143 for (i = cum->sse_regno; i < max; ++i)
7145 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7146 mem = gen_rtx_MEM (smode, mem);
7147 MEM_NOTRAP_P (mem) = 1;
7148 set_mem_alias_set (mem, set);
7149 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7151 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7159 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7161 alias_set_type set = get_varargs_alias_set ();
7164 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7168 mem = gen_rtx_MEM (Pmode,
7169 plus_constant (virtual_incoming_args_rtx,
7170 i * UNITS_PER_WORD));
7171 MEM_NOTRAP_P (mem) = 1;
7172 set_mem_alias_set (mem, set);
7174 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7175 emit_move_insn (mem, reg);
7180 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7181 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7184 CUMULATIVE_ARGS next_cum;
7187 /* This argument doesn't appear to be used anymore. Which is good,
7188 because the old code here didn't suppress rtl generation. */
7189 gcc_assert (!no_rtl);
7194 fntype = TREE_TYPE (current_function_decl);
7196 /* For varargs, we do not want to skip the dummy va_dcl argument.
7197 For stdargs, we do want to skip the last named argument. */
7199 if (stdarg_p (fntype))
7200 ix86_function_arg_advance (&next_cum, mode, type, true);
7202 if (cum->call_abi == MS_ABI)
7203 setup_incoming_varargs_ms_64 (&next_cum);
7205 setup_incoming_varargs_64 (&next_cum);
7208 /* Checks if TYPE is of kind va_list char *. */
7211 is_va_list_char_pointer (tree type)
7215 /* For 32-bit it is always true. */
7218 canonic = ix86_canonical_va_list_type (type);
7219 return (canonic == ms_va_list_type_node
7220 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7223 /* Implement va_start. */
7226 ix86_va_start (tree valist, rtx nextarg)
7228 HOST_WIDE_INT words, n_gpr, n_fpr;
7229 tree f_gpr, f_fpr, f_ovf, f_sav;
7230 tree gpr, fpr, ovf, sav, t;
7233 /* Only 64bit target needs something special. */
7234 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7236 std_expand_builtin_va_start (valist, nextarg);
7240 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7241 f_fpr = DECL_CHAIN (f_gpr);
7242 f_ovf = DECL_CHAIN (f_fpr);
7243 f_sav = DECL_CHAIN (f_ovf);
7245 valist = build_simple_mem_ref (valist);
7246 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
7247 /* The following should be folded into the MEM_REF offset. */
7248 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
7250 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
7252 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
7254 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
7257 /* Count number of gp and fp argument registers used. */
7258 words = crtl->args.info.words;
7259 n_gpr = crtl->args.info.regno;
7260 n_fpr = crtl->args.info.sse_regno;
7262 if (cfun->va_list_gpr_size)
7264 type = TREE_TYPE (gpr);
7265 t = build2 (MODIFY_EXPR, type,
7266 gpr, build_int_cst (type, n_gpr * 8));
7267 TREE_SIDE_EFFECTS (t) = 1;
7268 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7271 if (TARGET_SSE && cfun->va_list_fpr_size)
7273 type = TREE_TYPE (fpr);
7274 t = build2 (MODIFY_EXPR, type, fpr,
7275 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
7276 TREE_SIDE_EFFECTS (t) = 1;
7277 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7280 /* Find the overflow area. */
7281 type = TREE_TYPE (ovf);
7282 t = make_tree (type, crtl->args.internal_arg_pointer);
7284 t = build2 (POINTER_PLUS_EXPR, type, t,
7285 size_int (words * UNITS_PER_WORD));
7286 t = build2 (MODIFY_EXPR, type, ovf, t);
7287 TREE_SIDE_EFFECTS (t) = 1;
7288 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7290 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
7292 /* Find the register save area.
7293 Prologue of the function save it right above stack frame. */
7294 type = TREE_TYPE (sav);
7295 t = make_tree (type, frame_pointer_rtx);
7296 if (!ix86_varargs_gpr_size)
7297 t = build2 (POINTER_PLUS_EXPR, type, t,
7298 size_int (-8 * X86_64_REGPARM_MAX));
7299 t = build2 (MODIFY_EXPR, type, sav, t);
7300 TREE_SIDE_EFFECTS (t) = 1;
7301 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7305 /* Implement va_arg. */
7308 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7311 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
7312 tree f_gpr, f_fpr, f_ovf, f_sav;
7313 tree gpr, fpr, ovf, sav, t;
7315 tree lab_false, lab_over = NULL_TREE;
7320 enum machine_mode nat_mode;
7321 unsigned int arg_boundary;
7323 /* Only 64bit target needs something special. */
7324 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7325 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
7327 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7328 f_fpr = DECL_CHAIN (f_gpr);
7329 f_ovf = DECL_CHAIN (f_fpr);
7330 f_sav = DECL_CHAIN (f_ovf);
7332 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
7333 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
7334 valist = build_va_arg_indirect_ref (valist);
7335 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
7336 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
7337 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
7339 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7341 type = build_pointer_type (type);
7342 size = int_size_in_bytes (type);
7343 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7345 nat_mode = type_natural_mode (type, NULL);
7354 /* Unnamed 256bit vector mode parameters are passed on stack. */
7355 if (ix86_cfun_abi () == SYSV_ABI)
7362 container = construct_container (nat_mode, TYPE_MODE (type),
7363 type, 0, X86_64_REGPARM_MAX,
7364 X86_64_SSE_REGPARM_MAX, intreg,
7369 /* Pull the value out of the saved registers. */
7371 addr = create_tmp_var (ptr_type_node, "addr");
7375 int needed_intregs, needed_sseregs;
7377 tree int_addr, sse_addr;
7379 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7380 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7382 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
7384 need_temp = (!REG_P (container)
7385 && ((needed_intregs && TYPE_ALIGN (type) > 64)
7386 || TYPE_ALIGN (type) > 128));
7388 /* In case we are passing structure, verify that it is consecutive block
7389 on the register save area. If not we need to do moves. */
7390 if (!need_temp && !REG_P (container))
7392 /* Verify that all registers are strictly consecutive */
7393 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
7397 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7399 rtx slot = XVECEXP (container, 0, i);
7400 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
7401 || INTVAL (XEXP (slot, 1)) != i * 16)
7409 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7411 rtx slot = XVECEXP (container, 0, i);
7412 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
7413 || INTVAL (XEXP (slot, 1)) != i * 8)
7425 int_addr = create_tmp_var (ptr_type_node, "int_addr");
7426 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
7429 /* First ensure that we fit completely in registers. */
7432 t = build_int_cst (TREE_TYPE (gpr),
7433 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
7434 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
7435 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7436 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7437 gimplify_and_add (t, pre_p);
7441 t = build_int_cst (TREE_TYPE (fpr),
7442 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7443 + X86_64_REGPARM_MAX * 8);
7444 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
7445 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7446 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7447 gimplify_and_add (t, pre_p);
7450 /* Compute index to start of area used for integer regs. */
7453 /* int_addr = gpr + sav; */
7454 t = fold_convert (sizetype, gpr);
7455 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7456 gimplify_assign (int_addr, t, pre_p);
7460 /* sse_addr = fpr + sav; */
7461 t = fold_convert (sizetype, fpr);
7462 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7463 gimplify_assign (sse_addr, t, pre_p);
7467 int i, prev_size = 0;
7468 tree temp = create_tmp_var (type, "va_arg_tmp");
7471 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
7472 gimplify_assign (addr, t, pre_p);
7474 for (i = 0; i < XVECLEN (container, 0); i++)
7476 rtx slot = XVECEXP (container, 0, i);
7477 rtx reg = XEXP (slot, 0);
7478 enum machine_mode mode = GET_MODE (reg);
7484 tree dest_addr, dest;
7485 int cur_size = GET_MODE_SIZE (mode);
7487 if (prev_size + cur_size > size)
7489 cur_size = size - prev_size;
7490 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
7491 if (mode == BLKmode)
7494 piece_type = lang_hooks.types.type_for_mode (mode, 1);
7495 if (mode == GET_MODE (reg))
7496 addr_type = build_pointer_type (piece_type);
7498 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7500 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7503 if (SSE_REGNO_P (REGNO (reg)))
7505 src_addr = sse_addr;
7506 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7510 src_addr = int_addr;
7511 src_offset = REGNO (reg) * 8;
7513 src_addr = fold_convert (addr_type, src_addr);
7514 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
7515 size_int (src_offset));
7517 dest_addr = fold_convert (daddr_type, addr);
7518 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
7519 size_int (INTVAL (XEXP (slot, 1))));
7520 if (cur_size == GET_MODE_SIZE (mode))
7522 src = build_va_arg_indirect_ref (src_addr);
7523 dest = build_va_arg_indirect_ref (dest_addr);
7525 gimplify_assign (dest, src, pre_p);
7530 = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
7531 3, dest_addr, src_addr,
7532 size_int (cur_size));
7533 gimplify_and_add (copy, pre_p);
7535 prev_size += cur_size;
7541 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7542 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7543 gimplify_assign (gpr, t, pre_p);
7548 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7549 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7550 gimplify_assign (fpr, t, pre_p);
7553 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7555 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7558 /* ... otherwise out of the overflow area. */
7560 /* When we align parameter on stack for caller, if the parameter
7561 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7562 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7563 here with caller. */
7564 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
7565 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7566 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7568 /* Care for on-stack alignment if needed. */
7569 if (arg_boundary <= 64 || size == 0)
7573 HOST_WIDE_INT align = arg_boundary / 8;
7574 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
7575 size_int (align - 1));
7576 t = fold_convert (sizetype, t);
7577 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7579 t = fold_convert (TREE_TYPE (ovf), t);
7582 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7583 gimplify_assign (addr, t, pre_p);
7585 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
7586 size_int (rsize * UNITS_PER_WORD));
7587 gimplify_assign (unshare_expr (ovf), t, pre_p);
7590 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7592 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
7593 addr = fold_convert (ptrtype, addr);
7596 addr = build_va_arg_indirect_ref (addr);
7597 return build_va_arg_indirect_ref (addr);
7600 /* Return nonzero if OPNUM's MEM should be matched
7601 in movabs* patterns. */
7604 ix86_check_movabs (rtx insn, int opnum)
7608 set = PATTERN (insn);
7609 if (GET_CODE (set) == PARALLEL)
7610 set = XVECEXP (set, 0, 0);
7611 gcc_assert (GET_CODE (set) == SET);
7612 mem = XEXP (set, opnum);
7613 while (GET_CODE (mem) == SUBREG)
7614 mem = SUBREG_REG (mem);
7615 gcc_assert (MEM_P (mem));
7616 return (volatile_ok || !MEM_VOLATILE_P (mem));
7619 /* Initialize the table of extra 80387 mathematical constants. */
7622 init_ext_80387_constants (void)
7624 static const char * cst[5] =
7626 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
7627 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
7628 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
7629 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
7630 "3.1415926535897932385128089594061862044", /* 4: fldpi */
7634 for (i = 0; i < 5; i++)
7636 real_from_string (&ext_80387_constants_table[i], cst[i]);
7637 /* Ensure each constant is rounded to XFmode precision. */
7638 real_convert (&ext_80387_constants_table[i],
7639 XFmode, &ext_80387_constants_table[i]);
7642 ext_80387_constants_init = 1;
7645 /* Return true if the constant is something that can be loaded with
7646 a special instruction. */
7649 standard_80387_constant_p (rtx x)
7651 enum machine_mode mode = GET_MODE (x);
7655 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
7658 if (x == CONST0_RTX (mode))
7660 if (x == CONST1_RTX (mode))
7663 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7665 /* For XFmode constants, try to find a special 80387 instruction when
7666 optimizing for size or on those CPUs that benefit from them. */
7668 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
7672 if (! ext_80387_constants_init)
7673 init_ext_80387_constants ();
7675 for (i = 0; i < 5; i++)
7676 if (real_identical (&r, &ext_80387_constants_table[i]))
7680 /* Load of the constant -0.0 or -1.0 will be split as
7681 fldz;fchs or fld1;fchs sequence. */
7682 if (real_isnegzero (&r))
7684 if (real_identical (&r, &dconstm1))
7690 /* Return the opcode of the special instruction to be used to load
7694 standard_80387_constant_opcode (rtx x)
7696 switch (standard_80387_constant_p (x))
7720 /* Return the CONST_DOUBLE representing the 80387 constant that is
7721 loaded by the specified special instruction. The argument IDX
7722 matches the return value from standard_80387_constant_p. */
7725 standard_80387_constant_rtx (int idx)
7729 if (! ext_80387_constants_init)
7730 init_ext_80387_constants ();
7746 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
7750 /* Return 1 if X is all 0s and 2 if x is all 1s
7751 in supported SSE vector mode. */
7754 standard_sse_constant_p (rtx x)
7756 enum machine_mode mode = GET_MODE (x);
7758 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
7760 if (vector_all_ones_operand (x, mode))
7776 /* Return the opcode of the special instruction to be used to load
7780 standard_sse_constant_opcode (rtx insn, rtx x)
7782 switch (standard_sse_constant_p (x))
7785 switch (get_attr_mode (insn))
7788 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7790 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7791 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7793 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7795 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7796 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7798 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7800 return "vxorps\t%x0, %x0, %x0";
7802 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7803 return "vxorps\t%x0, %x0, %x0";
7805 return "vxorpd\t%x0, %x0, %x0";
7807 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7808 return "vxorps\t%x0, %x0, %x0";
7810 return "vpxor\t%x0, %x0, %x0";
7815 return TARGET_AVX ? "vpcmpeqd\t%0, %0, %0" : "pcmpeqd\t%0, %0";
7822 /* Returns 1 if OP contains a symbol reference */
7825 symbolic_reference_mentioned_p (rtx op)
7830 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7833 fmt = GET_RTX_FORMAT (GET_CODE (op));
7834 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7840 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7841 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7845 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7852 /* Return 1 if it is appropriate to emit `ret' instructions in the
7853 body of a function. Do this only if the epilogue is simple, needing a
7854 couple of insns. Prior to reloading, we can't tell how many registers
7855 must be saved, so return 0 then. Return 0 if there is no frame
7856 marker to de-allocate. */
7859 ix86_can_use_return_insn_p (void)
7861 struct ix86_frame frame;
7863 if (! reload_completed || frame_pointer_needed)
7866 /* Don't allow more than 32 pop, since that's all we can do
7867 with one instruction. */
7868 if (crtl->args.pops_args
7869 && crtl->args.size >= 32768)
7872 ix86_compute_frame_layout (&frame);
7873 return frame.to_allocate == 0 && frame.padding0 == 0
7874 && (frame.nregs + frame.nsseregs) == 0;
7877 /* Value should be nonzero if functions must have frame pointers.
7878 Zero means the frame pointer need not be set up (and parms may
7879 be accessed via the stack pointer) in functions that seem suitable. */
7882 ix86_frame_pointer_required (void)
7884 /* If we accessed previous frames, then the generated code expects
7885 to be able to access the saved ebp value in our frame. */
7886 if (cfun->machine->accesses_prev_frame)
7889 /* Several x86 os'es need a frame pointer for other reasons,
7890 usually pertaining to setjmp. */
7891 if (SUBTARGET_FRAME_POINTER_REQUIRED)
7894 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
7895 the frame pointer by default. Turn it back on now if we've not
7896 got a leaf function. */
7897 if (TARGET_OMIT_LEAF_FRAME_POINTER
7898 && (!current_function_is_leaf
7899 || ix86_current_function_calls_tls_descriptor))
7902 if (crtl->profile && !flag_fentry)
7908 /* Record that the current function accesses previous call frames. */
7911 ix86_setup_frame_addresses (void)
7913 cfun->machine->accesses_prev_frame = 1;
7916 #ifndef USE_HIDDEN_LINKONCE
7917 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7918 # define USE_HIDDEN_LINKONCE 1
7920 # define USE_HIDDEN_LINKONCE 0
7924 static int pic_labels_used;
7926 /* Fills in the label name that should be used for a pc thunk for
7927 the given register. */
7930 get_pc_thunk_name (char name[32], unsigned int regno)
7932 gcc_assert (!TARGET_64BIT);
7934 if (USE_HIDDEN_LINKONCE)
7935 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7937 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7941 /* This function generates code for -fpic that loads %ebx with
7942 the return address of the caller and then returns. */
7945 ix86_code_end (void)
7950 for (regno = 0; regno < 8; ++regno)
7955 if (! ((pic_labels_used >> regno) & 1))
7958 get_pc_thunk_name (name, regno);
7960 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
7961 get_identifier (name),
7962 build_function_type (void_type_node, void_list_node));
7963 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
7964 NULL_TREE, void_type_node);
7965 TREE_PUBLIC (decl) = 1;
7966 TREE_STATIC (decl) = 1;
7971 switch_to_section (darwin_sections[text_coal_section]);
7972 fputs ("\t.weak_definition\t", asm_out_file);
7973 assemble_name (asm_out_file, name);
7974 fputs ("\n\t.private_extern\t", asm_out_file);
7975 assemble_name (asm_out_file, name);
7976 putc ('\n', asm_out_file);
7977 ASM_OUTPUT_LABEL (asm_out_file, name);
7978 DECL_WEAK (decl) = 1;
7982 if (USE_HIDDEN_LINKONCE)
7984 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
7986 targetm.asm_out.unique_section (decl, 0);
7987 switch_to_section (get_named_section (decl, NULL, 0));
7989 targetm.asm_out.globalize_label (asm_out_file, name);
7990 fputs ("\t.hidden\t", asm_out_file);
7991 assemble_name (asm_out_file, name);
7992 putc ('\n', asm_out_file);
7993 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
7997 switch_to_section (text_section);
7998 ASM_OUTPUT_LABEL (asm_out_file, name);
8001 DECL_INITIAL (decl) = make_node (BLOCK);
8002 current_function_decl = decl;
8003 init_function_start (decl);
8004 first_function_block_is_cold = false;
8005 /* Make sure unwind info is emitted for the thunk if needed. */
8006 final_start_function (emit_barrier (), asm_out_file, 1);
8008 xops[0] = gen_rtx_REG (Pmode, regno);
8009 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8010 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8011 output_asm_insn ("ret", xops);
8012 final_end_function ();
8013 init_insn_lengths ();
8014 free_after_compilation (cfun);
8016 current_function_decl = NULL;
8020 /* Emit code for the SET_GOT patterns. */
8023 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8029 if (TARGET_VXWORKS_RTP && flag_pic)
8031 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8032 xops[2] = gen_rtx_MEM (Pmode,
8033 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8034 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8036 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8037 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8038 an unadorned address. */
8039 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8040 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8041 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8045 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8047 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
8049 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8052 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8055 output_asm_insn ("call\t%a2", xops);
8056 #ifdef DWARF2_UNWIND_INFO
8057 /* The call to next label acts as a push. */
8058 if (dwarf2out_do_frame ())
8062 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8063 gen_rtx_PLUS (Pmode,
8066 RTX_FRAME_RELATED_P (insn) = 1;
8067 dwarf2out_frame_debug (insn, true);
8074 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8075 is what will be referenced by the Mach-O PIC subsystem. */
8077 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8080 targetm.asm_out.internal_label (asm_out_file, "L",
8081 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8085 output_asm_insn ("pop%z0\t%0", xops);
8086 #ifdef DWARF2_UNWIND_INFO
8087 /* The pop is a pop and clobbers dest, but doesn't restore it
8088 for unwind info purposes. */
8089 if (dwarf2out_do_frame ())
8093 insn = emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
8094 dwarf2out_frame_debug (insn, true);
8095 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8096 gen_rtx_PLUS (Pmode,
8099 RTX_FRAME_RELATED_P (insn) = 1;
8100 dwarf2out_frame_debug (insn, true);
8109 get_pc_thunk_name (name, REGNO (dest));
8110 pic_labels_used |= 1 << REGNO (dest);
8112 #ifdef DWARF2_UNWIND_INFO
8113 /* Ensure all queued register saves are flushed before the
8115 if (dwarf2out_do_frame ())
8119 insn = emit_barrier ();
8121 dwarf2out_frame_debug (insn, false);
8124 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8125 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8126 output_asm_insn ("call\t%X2", xops);
8127 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8128 is what will be referenced by the Mach-O PIC subsystem. */
8131 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8133 targetm.asm_out.internal_label (asm_out_file, "L",
8134 CODE_LABEL_NUMBER (label));
8141 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
8142 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8144 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
8149 /* Generate an "push" pattern for input ARG. */
8154 if (ix86_cfa_state->reg == stack_pointer_rtx)
8155 ix86_cfa_state->offset += UNITS_PER_WORD;
8157 return gen_rtx_SET (VOIDmode,
8159 gen_rtx_PRE_DEC (Pmode,
8160 stack_pointer_rtx)),
8164 /* Return >= 0 if there is an unused call-clobbered register available
8165 for the entire function. */
8168 ix86_select_alt_pic_regnum (void)
8170 if (current_function_is_leaf
8172 && !ix86_current_function_calls_tls_descriptor)
8175 /* Can't use the same register for both PIC and DRAP. */
8177 drap = REGNO (crtl->drap_reg);
8180 for (i = 2; i >= 0; --i)
8181 if (i != drap && !df_regs_ever_live_p (i))
8185 return INVALID_REGNUM;
8188 /* Return 1 if we need to save REGNO. */
8190 ix86_save_reg (unsigned int regno, int maybe_eh_return)
8192 if (pic_offset_table_rtx
8193 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
8194 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8196 || crtl->calls_eh_return
8197 || crtl->uses_const_pool))
8199 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
8204 if (crtl->calls_eh_return && maybe_eh_return)
8209 unsigned test = EH_RETURN_DATA_REGNO (i);
8210 if (test == INVALID_REGNUM)
8217 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8220 return (df_regs_ever_live_p (regno)
8221 && !call_used_regs[regno]
8222 && !fixed_regs[regno]
8223 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
8226 /* Return number of saved general prupose registers. */
8229 ix86_nsaved_regs (void)
8234 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8235 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8240 /* Return number of saved SSE registrers. */
8243 ix86_nsaved_sseregs (void)
8248 if (ix86_cfun_abi () != MS_ABI)
8250 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8251 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8256 /* Given FROM and TO register numbers, say whether this elimination is
8257 allowed. If stack alignment is needed, we can only replace argument
8258 pointer with hard frame pointer, or replace frame pointer with stack
8259 pointer. Otherwise, frame pointer elimination is automatically
8260 handled and all other eliminations are valid. */
8263 ix86_can_eliminate (const int from, const int to)
8265 if (stack_realign_fp)
8266 return ((from == ARG_POINTER_REGNUM
8267 && to == HARD_FRAME_POINTER_REGNUM)
8268 || (from == FRAME_POINTER_REGNUM
8269 && to == STACK_POINTER_REGNUM));
8271 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
8274 /* Return the offset between two registers, one to be eliminated, and the other
8275 its replacement, at the start of a routine. */
8278 ix86_initial_elimination_offset (int from, int to)
8280 struct ix86_frame frame;
8281 ix86_compute_frame_layout (&frame);
8283 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8284 return frame.hard_frame_pointer_offset;
8285 else if (from == FRAME_POINTER_REGNUM
8286 && to == HARD_FRAME_POINTER_REGNUM)
8287 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
8290 gcc_assert (to == STACK_POINTER_REGNUM);
8292 if (from == ARG_POINTER_REGNUM)
8293 return frame.stack_pointer_offset;
8295 gcc_assert (from == FRAME_POINTER_REGNUM);
8296 return frame.stack_pointer_offset - frame.frame_pointer_offset;
8300 /* In a dynamically-aligned function, we can't know the offset from
8301 stack pointer to frame pointer, so we must ensure that setjmp
8302 eliminates fp against the hard fp (%ebp) rather than trying to
8303 index from %esp up to the top of the frame across a gap that is
8304 of unknown (at compile-time) size. */
8306 ix86_builtin_setjmp_frame_value (void)
8308 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
8311 /* Fill structure ix86_frame about frame of currently computed function. */
8314 ix86_compute_frame_layout (struct ix86_frame *frame)
8316 unsigned int stack_alignment_needed;
8317 HOST_WIDE_INT offset;
8318 unsigned int preferred_alignment;
8319 HOST_WIDE_INT size = get_frame_size ();
8321 frame->nregs = ix86_nsaved_regs ();
8322 frame->nsseregs = ix86_nsaved_sseregs ();
8324 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
8325 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
8327 /* MS ABI seem to require stack alignment to be always 16 except for function
8328 prologues and leaf. */
8329 if ((ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
8330 && (!current_function_is_leaf || cfun->calls_alloca != 0
8331 || ix86_current_function_calls_tls_descriptor))
8333 preferred_alignment = 16;
8334 stack_alignment_needed = 16;
8335 crtl->preferred_stack_boundary = 128;
8336 crtl->stack_alignment_needed = 128;
8339 gcc_assert (!size || stack_alignment_needed);
8340 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
8341 gcc_assert (preferred_alignment <= stack_alignment_needed);
8343 /* During reload iteration the amount of registers saved can change.
8344 Recompute the value as needed. Do not recompute when amount of registers
8345 didn't change as reload does multiple calls to the function and does not
8346 expect the decision to change within single iteration. */
8347 if (!optimize_function_for_size_p (cfun)
8348 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
8350 int count = frame->nregs;
8351 struct cgraph_node *node = cgraph_node (current_function_decl);
8353 cfun->machine->use_fast_prologue_epilogue_nregs = count;
8354 /* The fast prologue uses move instead of push to save registers. This
8355 is significantly longer, but also executes faster as modern hardware
8356 can execute the moves in parallel, but can't do that for push/pop.
8358 Be careful about choosing what prologue to emit: When function takes
8359 many instructions to execute we may use slow version as well as in
8360 case function is known to be outside hot spot (this is known with
8361 feedback only). Weight the size of function by number of registers
8362 to save as it is cheap to use one or two push instructions but very
8363 slow to use many of them. */
8365 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
8366 if (node->frequency < NODE_FREQUENCY_NORMAL
8367 || (flag_branch_probabilities
8368 && node->frequency < NODE_FREQUENCY_HOT))
8369 cfun->machine->use_fast_prologue_epilogue = false;
8371 cfun->machine->use_fast_prologue_epilogue
8372 = !expensive_function_p (count);
8374 if (TARGET_PROLOGUE_USING_MOVE
8375 && cfun->machine->use_fast_prologue_epilogue)
8376 frame->save_regs_using_mov = true;
8378 frame->save_regs_using_mov = false;
8380 /* If static stack checking is enabled and done with probes, the registers
8381 need to be saved before allocating the frame. */
8382 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
8383 frame->save_regs_using_mov = false;
8385 /* Skip return address. */
8386 offset = UNITS_PER_WORD;
8388 /* Skip pushed static chain. */
8389 if (ix86_static_chain_on_stack)
8390 offset += UNITS_PER_WORD;
8392 /* Skip saved base pointer. */
8393 if (frame_pointer_needed)
8394 offset += UNITS_PER_WORD;
8396 frame->hard_frame_pointer_offset = offset;
8398 /* Set offset to aligned because the realigned frame starts from
8400 if (stack_realign_fp)
8401 offset = (offset + stack_alignment_needed -1) & -stack_alignment_needed;
8403 /* Register save area */
8404 offset += frame->nregs * UNITS_PER_WORD;
8406 /* Align SSE reg save area. */
8407 if (frame->nsseregs)
8408 frame->padding0 = ((offset + 16 - 1) & -16) - offset;
8410 frame->padding0 = 0;
8412 /* SSE register save area. */
8413 offset += frame->padding0 + frame->nsseregs * 16;
8416 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
8417 offset += frame->va_arg_size;
8419 /* Align start of frame for local function. */
8420 frame->padding1 = ((offset + stack_alignment_needed - 1)
8421 & -stack_alignment_needed) - offset;
8423 offset += frame->padding1;
8425 /* Frame pointer points here. */
8426 frame->frame_pointer_offset = offset;
8430 /* Add outgoing arguments area. Can be skipped if we eliminated
8431 all the function calls as dead code.
8432 Skipping is however impossible when function calls alloca. Alloca
8433 expander assumes that last crtl->outgoing_args_size
8434 of stack frame are unused. */
8435 if (ACCUMULATE_OUTGOING_ARGS
8436 && (!current_function_is_leaf || cfun->calls_alloca
8437 || ix86_current_function_calls_tls_descriptor))
8439 offset += crtl->outgoing_args_size;
8440 frame->outgoing_arguments_size = crtl->outgoing_args_size;
8443 frame->outgoing_arguments_size = 0;
8445 /* Align stack boundary. Only needed if we're calling another function
8447 if (!current_function_is_leaf || cfun->calls_alloca
8448 || ix86_current_function_calls_tls_descriptor)
8449 frame->padding2 = ((offset + preferred_alignment - 1)
8450 & -preferred_alignment) - offset;
8452 frame->padding2 = 0;
8454 offset += frame->padding2;
8456 /* We've reached end of stack frame. */
8457 frame->stack_pointer_offset = offset;
8459 /* Size prologue needs to allocate. */
8460 frame->to_allocate =
8461 (size + frame->padding1 + frame->padding2
8462 + frame->outgoing_arguments_size + frame->va_arg_size);
8464 if ((!frame->to_allocate && frame->nregs <= 1)
8465 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
8466 frame->save_regs_using_mov = false;
8468 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
8469 && current_function_sp_is_unchanging
8470 && current_function_is_leaf
8471 && !ix86_current_function_calls_tls_descriptor)
8473 frame->red_zone_size = frame->to_allocate;
8474 if (frame->save_regs_using_mov)
8475 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8476 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
8477 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
8480 frame->red_zone_size = 0;
8481 frame->to_allocate -= frame->red_zone_size;
8482 frame->stack_pointer_offset -= frame->red_zone_size;
8485 /* Emit code to save registers in the prologue. */
8488 ix86_emit_save_regs (void)
8493 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
8494 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8496 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
8497 RTX_FRAME_RELATED_P (insn) = 1;
8501 /* Emit code to save registers using MOV insns. First register
8502 is restored from POINTER + OFFSET. */
8504 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
8509 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8510 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8512 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
8514 gen_rtx_REG (Pmode, regno));
8515 RTX_FRAME_RELATED_P (insn) = 1;
8516 offset += UNITS_PER_WORD;
8520 /* Emit code to save registers using MOV insns. First register
8521 is restored from POINTER + OFFSET. */
8523 ix86_emit_save_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
8529 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8530 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8532 mem = adjust_address (gen_rtx_MEM (TImode, pointer), TImode, offset);
8533 set_mem_align (mem, 128);
8534 insn = emit_move_insn (mem, gen_rtx_REG (TImode, regno));
8535 RTX_FRAME_RELATED_P (insn) = 1;
8540 static GTY(()) rtx queued_cfa_restores;
8542 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
8543 manipulation insn. Don't add it if the previously
8544 saved value will be left untouched within stack red-zone till return,
8545 as unwinders can find the same value in the register and
8549 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT red_offset)
8552 && !TARGET_64BIT_MS_ABI
8553 && red_offset + RED_ZONE_SIZE >= 0
8554 && crtl->args.pops_args < 65536)
8559 add_reg_note (insn, REG_CFA_RESTORE, reg);
8560 RTX_FRAME_RELATED_P (insn) = 1;
8564 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
8567 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
8570 ix86_add_queued_cfa_restore_notes (rtx insn)
8573 if (!queued_cfa_restores)
8575 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
8577 XEXP (last, 1) = REG_NOTES (insn);
8578 REG_NOTES (insn) = queued_cfa_restores;
8579 queued_cfa_restores = NULL_RTX;
8580 RTX_FRAME_RELATED_P (insn) = 1;
8583 /* Expand prologue or epilogue stack adjustment.
8584 The pattern exist to put a dependency on all ebp-based memory accesses.
8585 STYLE should be negative if instructions should be marked as frame related,
8586 zero if %r11 register is live and cannot be freely used and positive
8590 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
8591 int style, bool set_cfa)
8596 insn = emit_insn (gen_pro_epilogue_adjust_stack_si_1 (dest, src, offset));
8597 else if (x86_64_immediate_operand (offset, DImode))
8598 insn = emit_insn (gen_pro_epilogue_adjust_stack_di_1 (dest, src, offset));
8602 /* r11 is used by indirect sibcall return as well, set before the
8603 epilogue and used after the epilogue. */
8605 tmp = gen_rtx_REG (DImode, R11_REG);
8608 gcc_assert (src != hard_frame_pointer_rtx
8609 && dest != hard_frame_pointer_rtx);
8610 tmp = hard_frame_pointer_rtx;
8612 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
8614 RTX_FRAME_RELATED_P (insn) = 1;
8615 insn = emit_insn (gen_pro_epilogue_adjust_stack_di_2 (dest, src, tmp,
8620 ix86_add_queued_cfa_restore_notes (insn);
8626 gcc_assert (ix86_cfa_state->reg == src);
8627 ix86_cfa_state->offset += INTVAL (offset);
8628 ix86_cfa_state->reg = dest;
8630 r = gen_rtx_PLUS (Pmode, src, offset);
8631 r = gen_rtx_SET (VOIDmode, dest, r);
8632 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
8633 RTX_FRAME_RELATED_P (insn) = 1;
8636 RTX_FRAME_RELATED_P (insn) = 1;
8639 /* Find an available register to be used as dynamic realign argument
8640 pointer regsiter. Such a register will be written in prologue and
8641 used in begin of body, so it must not be
8642 1. parameter passing register.
8644 We reuse static-chain register if it is available. Otherwise, we
8645 use DI for i386 and R13 for x86-64. We chose R13 since it has
8648 Return: the regno of chosen register. */
8651 find_drap_reg (void)
8653 tree decl = cfun->decl;
8657 /* Use R13 for nested function or function need static chain.
8658 Since function with tail call may use any caller-saved
8659 registers in epilogue, DRAP must not use caller-saved
8660 register in such case. */
8661 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
8668 /* Use DI for nested function or function need static chain.
8669 Since function with tail call may use any caller-saved
8670 registers in epilogue, DRAP must not use caller-saved
8671 register in such case. */
8672 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
8675 /* Reuse static chain register if it isn't used for parameter
8677 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
8678 && !lookup_attribute ("fastcall",
8679 TYPE_ATTRIBUTES (TREE_TYPE (decl)))
8680 && !lookup_attribute ("thiscall",
8681 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
8688 /* Return minimum incoming stack alignment. */
8691 ix86_minimum_incoming_stack_boundary (bool sibcall)
8693 unsigned int incoming_stack_boundary;
8695 /* Prefer the one specified at command line. */
8696 if (ix86_user_incoming_stack_boundary)
8697 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
8698 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
8699 if -mstackrealign is used, it isn't used for sibcall check and
8700 estimated stack alignment is 128bit. */
8703 && ix86_force_align_arg_pointer
8704 && crtl->stack_alignment_estimated == 128)
8705 incoming_stack_boundary = MIN_STACK_BOUNDARY;
8707 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
8709 /* Incoming stack alignment can be changed on individual functions
8710 via force_align_arg_pointer attribute. We use the smallest
8711 incoming stack boundary. */
8712 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
8713 && lookup_attribute (ix86_force_align_arg_pointer_string,
8714 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
8715 incoming_stack_boundary = MIN_STACK_BOUNDARY;
8717 /* The incoming stack frame has to be aligned at least at
8718 parm_stack_boundary. */
8719 if (incoming_stack_boundary < crtl->parm_stack_boundary)
8720 incoming_stack_boundary = crtl->parm_stack_boundary;
8722 /* Stack at entrance of main is aligned by runtime. We use the
8723 smallest incoming stack boundary. */
8724 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
8725 && DECL_NAME (current_function_decl)
8726 && MAIN_NAME_P (DECL_NAME (current_function_decl))
8727 && DECL_FILE_SCOPE_P (current_function_decl))
8728 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
8730 return incoming_stack_boundary;
8733 /* Update incoming stack boundary and estimated stack alignment. */
8736 ix86_update_stack_boundary (void)
8738 ix86_incoming_stack_boundary
8739 = ix86_minimum_incoming_stack_boundary (false);
8741 /* x86_64 vararg needs 16byte stack alignment for register save
8745 && crtl->stack_alignment_estimated < 128)
8746 crtl->stack_alignment_estimated = 128;
8749 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
8750 needed or an rtx for DRAP otherwise. */
8753 ix86_get_drap_rtx (void)
8755 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
8756 crtl->need_drap = true;
8758 if (stack_realign_drap)
8760 /* Assign DRAP to vDRAP and returns vDRAP */
8761 unsigned int regno = find_drap_reg ();
8766 arg_ptr = gen_rtx_REG (Pmode, regno);
8767 crtl->drap_reg = arg_ptr;
8770 drap_vreg = copy_to_reg (arg_ptr);
8774 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
8777 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
8778 RTX_FRAME_RELATED_P (insn) = 1;
8786 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
8789 ix86_internal_arg_pointer (void)
8791 return virtual_incoming_args_rtx;
8794 struct scratch_reg {
8799 /* Return a short-lived scratch register for use on function entry.
8800 In 32-bit mode, it is valid only after the registers are saved
8801 in the prologue. This register must be released by means of
8802 release_scratch_register_on_entry once it is dead. */
8805 get_scratch_register_on_entry (struct scratch_reg *sr)
8813 /* We always use R11 in 64-bit mode. */
8818 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
8820 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
8821 bool static_chain_p = DECL_STATIC_CHAIN (decl);
8822 int regparm = ix86_function_regparm (fntype, decl);
8824 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
8826 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
8827 for the static chain register. */
8828 if ((regparm < 1 || (fastcall_p && !static_chain_p))
8829 && drap_regno != AX_REG)
8831 else if (regparm < 2 && drap_regno != DX_REG)
8833 /* ecx is the static chain register. */
8834 else if (regparm < 3 && !fastcall_p && !static_chain_p
8835 && drap_regno != CX_REG)
8837 else if (ix86_save_reg (BX_REG, true))
8839 /* esi is the static chain register. */
8840 else if (!(regparm == 3 && static_chain_p)
8841 && ix86_save_reg (SI_REG, true))
8843 else if (ix86_save_reg (DI_REG, true))
8847 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
8852 sr->reg = gen_rtx_REG (Pmode, regno);
8855 rtx insn = emit_insn (gen_push (sr->reg));
8856 RTX_FRAME_RELATED_P (insn) = 1;
8860 /* Release a scratch register obtained from the preceding function. */
8863 release_scratch_register_on_entry (struct scratch_reg *sr)
8867 rtx x, insn = emit_insn (ix86_gen_pop1 (sr->reg));
8869 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
8870 RTX_FRAME_RELATED_P (insn) = 1;
8871 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
8872 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
8873 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
8877 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
8879 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
8882 ix86_adjust_stack_and_probe (HOST_WIDE_INT size)
8884 /* We skip the probe for the first interval + a small dope of 4 words and
8885 probe that many bytes past the specified size to maintain a protection
8886 area at the botton of the stack. */
8887 const int dope = 4 * UNITS_PER_WORD;
8888 rtx size_rtx = GEN_INT (size);
8890 /* See if we have a constant small number of probes to generate. If so,
8891 that's the easy case. The run-time loop is made up of 11 insns in the
8892 generic case while the compile-time loop is made up of 3+2*(n-1) insns
8893 for n # of intervals. */
8894 if (size <= 5 * PROBE_INTERVAL)
8896 HOST_WIDE_INT i, adjust;
8897 bool first_probe = true;
8899 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
8900 values of N from 1 until it exceeds SIZE. If only one probe is
8901 needed, this will not generate any code. Then adjust and probe
8902 to PROBE_INTERVAL + SIZE. */
8903 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
8907 adjust = 2 * PROBE_INTERVAL + dope;
8908 first_probe = false;
8911 adjust = PROBE_INTERVAL;
8913 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8914 plus_constant (stack_pointer_rtx, -adjust)));
8915 emit_stack_probe (stack_pointer_rtx);
8919 adjust = size + PROBE_INTERVAL + dope;
8921 adjust = size + PROBE_INTERVAL - i;
8923 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8924 plus_constant (stack_pointer_rtx, -adjust)));
8925 emit_stack_probe (stack_pointer_rtx);
8927 /* Adjust back to account for the additional first interval. */
8928 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8929 plus_constant (stack_pointer_rtx,
8930 PROBE_INTERVAL + dope)));
8933 /* Otherwise, do the same as above, but in a loop. Note that we must be
8934 extra careful with variables wrapping around because we might be at
8935 the very top (or the very bottom) of the address space and we have
8936 to be able to handle this case properly; in particular, we use an
8937 equality test for the loop condition. */
8940 HOST_WIDE_INT rounded_size;
8941 struct scratch_reg sr;
8943 get_scratch_register_on_entry (&sr);
8946 /* Step 1: round SIZE to the previous multiple of the interval. */
8948 rounded_size = size & -PROBE_INTERVAL;
8951 /* Step 2: compute initial and final value of the loop counter. */
8953 /* SP = SP_0 + PROBE_INTERVAL. */
8954 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8955 plus_constant (stack_pointer_rtx,
8956 - (PROBE_INTERVAL + dope))));
8958 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
8959 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
8960 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
8961 gen_rtx_PLUS (Pmode, sr.reg,
8962 stack_pointer_rtx)));
8967 while (SP != LAST_ADDR)
8969 SP = SP + PROBE_INTERVAL
8973 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
8974 values of N from 1 until it is equal to ROUNDED_SIZE. */
8976 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
8979 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
8980 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
8982 if (size != rounded_size)
8984 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8985 plus_constant (stack_pointer_rtx,
8986 rounded_size - size)));
8987 emit_stack_probe (stack_pointer_rtx);
8990 /* Adjust back to account for the additional first interval. */
8991 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8992 plus_constant (stack_pointer_rtx,
8993 PROBE_INTERVAL + dope)));
8995 release_scratch_register_on_entry (&sr);
8998 gcc_assert (ix86_cfa_state->reg != stack_pointer_rtx);
9000 /* Make sure nothing is scheduled before we are done. */
9001 emit_insn (gen_blockage ());
9004 /* Adjust the stack pointer up to REG while probing it. */
9007 output_adjust_stack_and_probe (rtx reg)
9009 static int labelno = 0;
9010 char loop_lab[32], end_lab[32];
9013 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9014 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9016 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9018 /* Jump to END_LAB if SP == LAST_ADDR. */
9019 xops[0] = stack_pointer_rtx;
9021 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9022 fputs ("\tje\t", asm_out_file);
9023 assemble_name_raw (asm_out_file, end_lab);
9024 fputc ('\n', asm_out_file);
9026 /* SP = SP + PROBE_INTERVAL. */
9027 xops[1] = GEN_INT (PROBE_INTERVAL);
9028 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9031 xops[1] = const0_rtx;
9032 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
9034 fprintf (asm_out_file, "\tjmp\t");
9035 assemble_name_raw (asm_out_file, loop_lab);
9036 fputc ('\n', asm_out_file);
9038 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9043 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9044 inclusive. These are offsets from the current stack pointer. */
9047 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
9049 /* See if we have a constant small number of probes to generate. If so,
9050 that's the easy case. The run-time loop is made up of 7 insns in the
9051 generic case while the compile-time loop is made up of n insns for n #
9053 if (size <= 7 * PROBE_INTERVAL)
9057 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9058 it exceeds SIZE. If only one probe is needed, this will not
9059 generate any code. Then probe at FIRST + SIZE. */
9060 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9061 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
9063 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
9066 /* Otherwise, do the same as above, but in a loop. Note that we must be
9067 extra careful with variables wrapping around because we might be at
9068 the very top (or the very bottom) of the address space and we have
9069 to be able to handle this case properly; in particular, we use an
9070 equality test for the loop condition. */
9073 HOST_WIDE_INT rounded_size, last;
9074 struct scratch_reg sr;
9076 get_scratch_register_on_entry (&sr);
9079 /* Step 1: round SIZE to the previous multiple of the interval. */
9081 rounded_size = size & -PROBE_INTERVAL;
9084 /* Step 2: compute initial and final value of the loop counter. */
9086 /* TEST_OFFSET = FIRST. */
9087 emit_move_insn (sr.reg, GEN_INT (-first));
9089 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9090 last = first + rounded_size;
9095 while (TEST_ADDR != LAST_ADDR)
9097 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9101 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9102 until it is equal to ROUNDED_SIZE. */
9104 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
9107 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9108 that SIZE is equal to ROUNDED_SIZE. */
9110 if (size != rounded_size)
9111 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
9114 rounded_size - size));
9116 release_scratch_register_on_entry (&sr);
9119 /* Make sure nothing is scheduled before we are done. */
9120 emit_insn (gen_blockage ());
9123 /* Probe a range of stack addresses from REG to END, inclusive. These are
9124 offsets from the current stack pointer. */
9127 output_probe_stack_range (rtx reg, rtx end)
9129 static int labelno = 0;
9130 char loop_lab[32], end_lab[32];
9133 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9134 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9136 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9138 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
9141 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9142 fputs ("\tje\t", asm_out_file);
9143 assemble_name_raw (asm_out_file, end_lab);
9144 fputc ('\n', asm_out_file);
9146 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
9147 xops[1] = GEN_INT (PROBE_INTERVAL);
9148 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9150 /* Probe at TEST_ADDR. */
9151 xops[0] = stack_pointer_rtx;
9153 xops[2] = const0_rtx;
9154 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
9156 fprintf (asm_out_file, "\tjmp\t");
9157 assemble_name_raw (asm_out_file, loop_lab);
9158 fputc ('\n', asm_out_file);
9160 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9165 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
9166 to be generated in correct form. */
9168 ix86_finalize_stack_realign_flags (void)
9170 /* Check if stack realign is really needed after reload, and
9171 stores result in cfun */
9172 unsigned int incoming_stack_boundary
9173 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
9174 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
9175 unsigned int stack_realign = (incoming_stack_boundary
9176 < (current_function_is_leaf
9177 ? crtl->max_used_stack_slot_alignment
9178 : crtl->stack_alignment_needed));
9180 if (crtl->stack_realign_finalized)
9182 /* After stack_realign_needed is finalized, we can't no longer
9184 gcc_assert (crtl->stack_realign_needed == stack_realign);
9188 crtl->stack_realign_needed = stack_realign;
9189 crtl->stack_realign_finalized = true;
9193 /* Expand the prologue into a bunch of separate insns. */
9196 ix86_expand_prologue (void)
9200 struct ix86_frame frame;
9201 HOST_WIDE_INT allocate;
9202 int gen_frame_pointer = frame_pointer_needed;
9204 ix86_finalize_stack_realign_flags ();
9206 /* DRAP should not coexist with stack_realign_fp */
9207 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
9209 /* Initialize CFA state for before the prologue. */
9210 ix86_cfa_state->reg = stack_pointer_rtx;
9211 ix86_cfa_state->offset = INCOMING_FRAME_SP_OFFSET;
9213 ix86_compute_frame_layout (&frame);
9215 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
9219 /* Check if profiling is active and we shall use profiling before
9220 prologue variant. If so sorry. */
9221 if (crtl->profile && flag_fentry != 0)
9222 sorry ("ms_hook_prologue attribute isn't compatible with -mfentry for 32-bit");
9224 /* Make sure the function starts with
9225 8b ff movl.s %edi,%edi (emited by ix86_asm_output_function_label)
9227 8b ec movl.s %esp,%ebp
9229 This matches the hookable function prologue in Win32 API
9230 functions in Microsoft Windows XP Service Pack 2 and newer.
9231 Wine uses this to enable Windows apps to hook the Win32 API
9232 functions provided by Wine. */
9233 push = emit_insn (gen_push (hard_frame_pointer_rtx));
9234 mov = emit_insn (gen_vswapmov (hard_frame_pointer_rtx,
9235 stack_pointer_rtx));
9237 if (frame_pointer_needed && !(crtl->drap_reg
9238 && crtl->stack_realign_needed))
9240 /* The push %ebp and movl.s %esp, %ebp already set up
9241 the frame pointer. No need to do this again. */
9242 gen_frame_pointer = 0;
9243 RTX_FRAME_RELATED_P (push) = 1;
9244 RTX_FRAME_RELATED_P (mov) = 1;
9245 if (ix86_cfa_state->reg == stack_pointer_rtx)
9246 ix86_cfa_state->reg = hard_frame_pointer_rtx;
9249 /* If the frame pointer is not needed, pop %ebp again. This
9250 could be optimized for cases where ebp needs to be backed up
9251 for some other reason. If stack realignment is needed, pop
9252 the base pointer again, align the stack, and later regenerate
9253 the frame pointer setup. The frame pointer generated by the
9254 hook prologue is not aligned, so it can't be used. */
9255 insn = emit_insn (ix86_gen_pop1 (hard_frame_pointer_rtx));
9258 /* The first insn of a function that accepts its static chain on the
9259 stack is to push the register that would be filled in by a direct
9260 call. This insn will be skipped by the trampoline. */
9261 if (ix86_static_chain_on_stack)
9265 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
9266 emit_insn (gen_blockage ());
9268 /* We don't want to interpret this push insn as a register save,
9269 only as a stack adjustment. The real copy of the register as
9270 a save will be done later, if needed. */
9271 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
9272 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
9273 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
9274 RTX_FRAME_RELATED_P (insn) = 1;
9277 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
9278 of DRAP is needed and stack realignment is really needed after reload */
9279 if (crtl->drap_reg && crtl->stack_realign_needed)
9282 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9283 int param_ptr_offset = UNITS_PER_WORD;
9285 if (ix86_static_chain_on_stack)
9286 param_ptr_offset += UNITS_PER_WORD;
9287 if (!call_used_regs[REGNO (crtl->drap_reg)])
9288 param_ptr_offset += UNITS_PER_WORD;
9290 gcc_assert (stack_realign_drap);
9292 /* Grab the argument pointer. */
9293 x = plus_constant (stack_pointer_rtx, param_ptr_offset);
9296 /* Only need to push parameter pointer reg if it is caller
9298 if (!call_used_regs[REGNO (crtl->drap_reg)])
9300 /* Push arg pointer reg */
9301 insn = emit_insn (gen_push (y));
9302 RTX_FRAME_RELATED_P (insn) = 1;
9305 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
9306 RTX_FRAME_RELATED_P (insn) = 1;
9307 ix86_cfa_state->reg = crtl->drap_reg;
9309 /* Align the stack. */
9310 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
9312 GEN_INT (-align_bytes)));
9313 RTX_FRAME_RELATED_P (insn) = 1;
9315 /* Replicate the return address on the stack so that return
9316 address can be reached via (argp - 1) slot. This is needed
9317 to implement macro RETURN_ADDR_RTX and intrinsic function
9318 expand_builtin_return_addr etc. */
9320 x = gen_frame_mem (Pmode,
9321 plus_constant (x, -UNITS_PER_WORD));
9322 insn = emit_insn (gen_push (x));
9323 RTX_FRAME_RELATED_P (insn) = 1;
9326 /* Note: AT&T enter does NOT have reversed args. Enter is probably
9327 slower on all targets. Also sdb doesn't like it. */
9329 if (gen_frame_pointer)
9331 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
9332 RTX_FRAME_RELATED_P (insn) = 1;
9334 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
9335 RTX_FRAME_RELATED_P (insn) = 1;
9337 if (ix86_cfa_state->reg == stack_pointer_rtx)
9338 ix86_cfa_state->reg = hard_frame_pointer_rtx;
9341 if (stack_realign_fp)
9343 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9344 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
9346 /* Align the stack. */
9347 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
9349 GEN_INT (-align_bytes)));
9350 RTX_FRAME_RELATED_P (insn) = 1;
9353 allocate = frame.to_allocate + frame.nsseregs * 16 + frame.padding0;
9355 if (!frame.save_regs_using_mov)
9356 ix86_emit_save_regs ();
9358 allocate += frame.nregs * UNITS_PER_WORD;
9360 /* The stack has already been decremented by the instruction calling us
9361 so we need to probe unconditionally to preserve the protection area. */
9362 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
9364 /* We expect the registers to be saved when probes are used. */
9365 gcc_assert (!frame.save_regs_using_mov);
9367 if (STACK_CHECK_MOVING_SP)
9369 ix86_adjust_stack_and_probe (allocate);
9374 HOST_WIDE_INT size = allocate;
9376 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
9377 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
9379 if (TARGET_STACK_PROBE)
9380 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
9382 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
9386 /* When using red zone we may start register saving before allocating
9387 the stack frame saving one cycle of the prologue. However I will
9388 avoid doing this if I am going to have to probe the stack since
9389 at least on x86_64 the stack probe can turn into a call that clobbers
9390 a red zone location */
9391 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
9392 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
9393 ix86_emit_save_regs_using_mov ((frame_pointer_needed
9394 && !crtl->stack_realign_needed)
9395 ? hard_frame_pointer_rtx
9396 : stack_pointer_rtx,
9397 -frame.nregs * UNITS_PER_WORD);
9401 else if (!ix86_target_stack_probe () || allocate < CHECK_STACK_LIMIT)
9402 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9403 GEN_INT (-allocate), -1,
9404 ix86_cfa_state->reg == stack_pointer_rtx);
9407 rtx eax = gen_rtx_REG (Pmode, AX_REG);
9411 if (cfun->machine->call_abi == MS_ABI)
9414 eax_live = ix86_eax_live_at_start_p ();
9418 emit_insn (gen_push (eax));
9419 allocate -= UNITS_PER_WORD;
9422 emit_move_insn (eax, GEN_INT (allocate));
9424 insn = emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
9426 if (ix86_cfa_state->reg == stack_pointer_rtx)
9428 ix86_cfa_state->offset += allocate;
9429 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
9430 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
9431 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
9432 RTX_FRAME_RELATED_P (insn) = 1;
9437 if (frame_pointer_needed)
9438 t = plus_constant (hard_frame_pointer_rtx,
9441 - frame.nregs * UNITS_PER_WORD);
9443 t = plus_constant (stack_pointer_rtx, allocate);
9444 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
9448 if (frame.save_regs_using_mov
9449 && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
9450 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
9452 if (!frame_pointer_needed
9453 || !(frame.to_allocate + frame.padding0)
9454 || crtl->stack_realign_needed)
9455 ix86_emit_save_regs_using_mov (stack_pointer_rtx,
9457 + frame.nsseregs * 16 + frame.padding0);
9459 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
9460 -frame.nregs * UNITS_PER_WORD);
9462 if (!frame_pointer_needed
9463 || !(frame.to_allocate + frame.padding0)
9464 || crtl->stack_realign_needed)
9465 ix86_emit_save_sse_regs_using_mov (stack_pointer_rtx,
9468 ix86_emit_save_sse_regs_using_mov (hard_frame_pointer_rtx,
9469 - frame.nregs * UNITS_PER_WORD
9470 - frame.nsseregs * 16
9473 pic_reg_used = false;
9474 if (pic_offset_table_rtx
9475 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9478 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
9480 if (alt_pic_reg_used != INVALID_REGNUM)
9481 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
9483 pic_reg_used = true;
9490 if (ix86_cmodel == CM_LARGE_PIC)
9492 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
9493 rtx label = gen_label_rtx ();
9495 LABEL_PRESERVE_P (label) = 1;
9496 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
9497 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
9498 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
9499 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
9500 pic_offset_table_rtx, tmp_reg));
9503 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
9506 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
9509 /* In the pic_reg_used case, make sure that the got load isn't deleted
9510 when mcount needs it. Blockage to avoid call movement across mcount
9511 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
9513 if (crtl->profile && !flag_fentry && pic_reg_used)
9514 emit_insn (gen_prologue_use (pic_offset_table_rtx));
9516 if (crtl->drap_reg && !crtl->stack_realign_needed)
9518 /* vDRAP is setup but after reload it turns out stack realign
9519 isn't necessary, here we will emit prologue to setup DRAP
9520 without stack realign adjustment */
9522 int drap_bp_offset = UNITS_PER_WORD * 2;
9524 if (ix86_static_chain_on_stack)
9525 drap_bp_offset += UNITS_PER_WORD;
9526 x = plus_constant (hard_frame_pointer_rtx, drap_bp_offset);
9527 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, x));
9530 /* Prevent instructions from being scheduled into register save push
9531 sequence when access to the redzone area is done through frame pointer.
9532 The offset between the frame pointer and the stack pointer is calculated
9533 relative to the value of the stack pointer at the end of the function
9534 prologue, and moving instructions that access redzone area via frame
9535 pointer inside push sequence violates this assumption. */
9536 if (frame_pointer_needed && frame.red_zone_size)
9537 emit_insn (gen_memory_blockage ());
9539 /* Emit cld instruction if stringops are used in the function. */
9540 if (TARGET_CLD && ix86_current_function_needs_cld)
9541 emit_insn (gen_cld ());
9544 /* Emit code to restore REG using a POP insn. */
9547 ix86_emit_restore_reg_using_pop (rtx reg, HOST_WIDE_INT red_offset)
9549 rtx insn = emit_insn (ix86_gen_pop1 (reg));
9551 if (ix86_cfa_state->reg == crtl->drap_reg
9552 && REGNO (reg) == REGNO (crtl->drap_reg))
9554 /* Previously we'd represented the CFA as an expression
9555 like *(%ebp - 8). We've just popped that value from
9556 the stack, which means we need to reset the CFA to
9557 the drap register. This will remain until we restore
9558 the stack pointer. */
9559 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9560 RTX_FRAME_RELATED_P (insn) = 1;
9564 if (ix86_cfa_state->reg == stack_pointer_rtx)
9566 ix86_cfa_state->offset -= UNITS_PER_WORD;
9567 add_reg_note (insn, REG_CFA_ADJUST_CFA,
9568 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
9569 RTX_FRAME_RELATED_P (insn) = 1;
9572 /* When the frame pointer is the CFA, and we pop it, we are
9573 swapping back to the stack pointer as the CFA. This happens
9574 for stack frames that don't allocate other data, so we assume
9575 the stack pointer is now pointing at the return address, i.e.
9576 the function entry state, which makes the offset be 1 word. */
9577 else if (ix86_cfa_state->reg == hard_frame_pointer_rtx
9578 && reg == hard_frame_pointer_rtx)
9580 ix86_cfa_state->reg = stack_pointer_rtx;
9581 ix86_cfa_state->offset -= UNITS_PER_WORD;
9583 add_reg_note (insn, REG_CFA_DEF_CFA,
9584 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
9585 GEN_INT (ix86_cfa_state->offset)));
9586 RTX_FRAME_RELATED_P (insn) = 1;
9589 ix86_add_cfa_restore_note (insn, reg, red_offset);
9592 /* Emit code to restore saved registers using POP insns. */
9595 ix86_emit_restore_regs_using_pop (HOST_WIDE_INT red_offset)
9599 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9600 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
9602 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno),
9604 red_offset += UNITS_PER_WORD;
9608 /* Emit code and notes for the LEAVE instruction. */
9611 ix86_emit_leave (HOST_WIDE_INT red_offset)
9613 rtx insn = emit_insn (ix86_gen_leave ());
9615 ix86_add_queued_cfa_restore_notes (insn);
9617 if (ix86_cfa_state->reg == hard_frame_pointer_rtx)
9619 ix86_cfa_state->reg = stack_pointer_rtx;
9620 ix86_cfa_state->offset -= UNITS_PER_WORD;
9622 add_reg_note (insn, REG_CFA_ADJUST_CFA,
9623 copy_rtx (XVECEXP (PATTERN (insn), 0, 0)));
9624 RTX_FRAME_RELATED_P (insn) = 1;
9625 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx, red_offset);
9629 /* Emit code to restore saved registers using MOV insns. First register
9630 is restored from POINTER + OFFSET. */
9632 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
9633 HOST_WIDE_INT red_offset,
9634 int maybe_eh_return)
9637 rtx base_address = gen_rtx_MEM (Pmode, pointer);
9640 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9641 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
9643 rtx reg = gen_rtx_REG (Pmode, regno);
9645 /* Ensure that adjust_address won't be forced to produce pointer
9646 out of range allowed by x86-64 instruction set. */
9647 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
9651 r11 = gen_rtx_REG (DImode, R11_REG);
9652 emit_move_insn (r11, GEN_INT (offset));
9653 emit_insn (gen_adddi3 (r11, r11, pointer));
9654 base_address = gen_rtx_MEM (Pmode, r11);
9657 insn = emit_move_insn (reg,
9658 adjust_address (base_address, Pmode, offset));
9659 offset += UNITS_PER_WORD;
9661 if (ix86_cfa_state->reg == crtl->drap_reg
9662 && regno == REGNO (crtl->drap_reg))
9664 /* Previously we'd represented the CFA as an expression
9665 like *(%ebp - 8). We've just popped that value from
9666 the stack, which means we need to reset the CFA to
9667 the drap register. This will remain until we restore
9668 the stack pointer. */
9669 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9670 RTX_FRAME_RELATED_P (insn) = 1;
9673 ix86_add_cfa_restore_note (NULL_RTX, reg, red_offset);
9675 red_offset += UNITS_PER_WORD;
9679 /* Emit code to restore saved registers using MOV insns. First register
9680 is restored from POINTER + OFFSET. */
9682 ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
9683 HOST_WIDE_INT red_offset,
9684 int maybe_eh_return)
9687 rtx base_address = gen_rtx_MEM (TImode, pointer);
9690 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9691 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
9693 rtx reg = gen_rtx_REG (TImode, regno);
9695 /* Ensure that adjust_address won't be forced to produce pointer
9696 out of range allowed by x86-64 instruction set. */
9697 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
9701 r11 = gen_rtx_REG (DImode, R11_REG);
9702 emit_move_insn (r11, GEN_INT (offset));
9703 emit_insn (gen_adddi3 (r11, r11, pointer));
9704 base_address = gen_rtx_MEM (TImode, r11);
9707 mem = adjust_address (base_address, TImode, offset);
9708 set_mem_align (mem, 128);
9709 emit_move_insn (reg, mem);
9712 ix86_add_cfa_restore_note (NULL_RTX, reg, red_offset);
9718 /* Restore function stack, frame, and registers. */
9721 ix86_expand_epilogue (int style)
9724 struct ix86_frame frame;
9725 HOST_WIDE_INT offset, red_offset;
9726 struct machine_cfa_state cfa_state_save = *ix86_cfa_state;
9729 ix86_finalize_stack_realign_flags ();
9731 /* When stack is realigned, SP must be valid. */
9732 sp_valid = (!frame_pointer_needed
9733 || current_function_sp_is_unchanging
9734 || stack_realign_fp);
9736 ix86_compute_frame_layout (&frame);
9738 /* See the comment about red zone and frame
9739 pointer usage in ix86_expand_prologue. */
9740 if (frame_pointer_needed && frame.red_zone_size)
9741 emit_insn (gen_memory_blockage ());
9743 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
9744 gcc_assert (!using_drap || ix86_cfa_state->reg == crtl->drap_reg);
9746 /* Calculate start of saved registers relative to ebp. Special care
9747 must be taken for the normal return case of a function using
9748 eh_return: the eax and edx registers are marked as saved, but not
9749 restored along this path. */
9750 offset = frame.nregs;
9751 if (crtl->calls_eh_return && style != 2)
9753 offset *= -UNITS_PER_WORD;
9754 offset -= frame.nsseregs * 16 + frame.padding0;
9756 /* Calculate start of saved registers relative to esp on entry of the
9757 function. When realigning stack, this needs to be the most negative
9758 value possible at runtime. */
9759 red_offset = offset;
9761 red_offset -= crtl->stack_alignment_needed / BITS_PER_UNIT
9763 else if (stack_realign_fp)
9764 red_offset -= crtl->stack_alignment_needed / BITS_PER_UNIT
9766 if (ix86_static_chain_on_stack)
9767 red_offset -= UNITS_PER_WORD;
9768 if (frame_pointer_needed)
9769 red_offset -= UNITS_PER_WORD;
9771 /* If we're only restoring one register and sp is not valid then
9772 using a move instruction to restore the register since it's
9773 less work than reloading sp and popping the register.
9775 The default code result in stack adjustment using add/lea instruction,
9776 while this code results in LEAVE instruction (or discrete equivalent),
9777 so it is profitable in some other cases as well. Especially when there
9778 are no registers to restore. We also use this code when TARGET_USE_LEAVE
9779 and there is exactly one register to pop. This heuristic may need some
9780 tuning in future. */
9781 if ((!sp_valid && (frame.nregs + frame.nsseregs) <= 1)
9782 || (TARGET_EPILOGUE_USING_MOVE
9783 && cfun->machine->use_fast_prologue_epilogue
9784 && ((frame.nregs + frame.nsseregs) > 1
9785 || (frame.to_allocate + frame.padding0) != 0))
9786 || (frame_pointer_needed && !(frame.nregs + frame.nsseregs)
9787 && (frame.to_allocate + frame.padding0) != 0)
9788 || (frame_pointer_needed && TARGET_USE_LEAVE
9789 && cfun->machine->use_fast_prologue_epilogue
9790 && (frame.nregs + frame.nsseregs) == 1)
9791 || crtl->calls_eh_return)
9793 /* Restore registers. We can use ebp or esp to address the memory
9794 locations. If both are available, default to ebp, since offsets
9795 are known to be small. Only exception is esp pointing directly
9796 to the end of block of saved registers, where we may simplify
9799 If we are realigning stack with bp and sp, regs restore can't
9800 be addressed by bp. sp must be used instead. */
9802 if (!frame_pointer_needed
9803 || (sp_valid && !(frame.to_allocate + frame.padding0))
9804 || stack_realign_fp)
9806 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
9807 frame.to_allocate, red_offset,
9809 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
9811 + frame.nsseregs * 16
9814 + frame.nsseregs * 16
9815 + frame.padding0, style == 2);
9819 ix86_emit_restore_sse_regs_using_mov (hard_frame_pointer_rtx,
9822 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
9824 + frame.nsseregs * 16
9827 + frame.nsseregs * 16
9828 + frame.padding0, style == 2);
9831 red_offset -= offset;
9833 /* eh_return epilogues need %ecx added to the stack pointer. */
9836 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
9838 /* Stack align doesn't work with eh_return. */
9839 gcc_assert (!crtl->stack_realign_needed);
9840 /* Neither does regparm nested functions. */
9841 gcc_assert (!ix86_static_chain_on_stack);
9843 if (frame_pointer_needed)
9845 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
9846 tmp = plus_constant (tmp, UNITS_PER_WORD);
9847 tmp = emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
9849 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
9850 tmp = emit_move_insn (hard_frame_pointer_rtx, tmp);
9852 /* Note that we use SA as a temporary CFA, as the return
9853 address is at the proper place relative to it. We
9854 pretend this happens at the FP restore insn because
9855 prior to this insn the FP would be stored at the wrong
9856 offset relative to SA, and after this insn we have no
9857 other reasonable register to use for the CFA. We don't
9858 bother resetting the CFA to the SP for the duration of
9860 add_reg_note (tmp, REG_CFA_DEF_CFA,
9861 plus_constant (sa, UNITS_PER_WORD));
9862 ix86_add_queued_cfa_restore_notes (tmp);
9863 add_reg_note (tmp, REG_CFA_RESTORE, hard_frame_pointer_rtx);
9864 RTX_FRAME_RELATED_P (tmp) = 1;
9865 ix86_cfa_state->reg = sa;
9866 ix86_cfa_state->offset = UNITS_PER_WORD;
9868 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
9869 const0_rtx, style, false);
9873 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
9874 tmp = plus_constant (tmp, (frame.to_allocate
9875 + frame.nregs * UNITS_PER_WORD
9876 + frame.nsseregs * 16
9878 tmp = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
9879 ix86_add_queued_cfa_restore_notes (tmp);
9881 gcc_assert (ix86_cfa_state->reg == stack_pointer_rtx);
9882 if (ix86_cfa_state->offset != UNITS_PER_WORD)
9884 ix86_cfa_state->offset = UNITS_PER_WORD;
9885 add_reg_note (tmp, REG_CFA_DEF_CFA,
9886 plus_constant (stack_pointer_rtx,
9888 RTX_FRAME_RELATED_P (tmp) = 1;
9892 else if (!frame_pointer_needed)
9893 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9894 GEN_INT (frame.to_allocate
9895 + frame.nregs * UNITS_PER_WORD
9896 + frame.nsseregs * 16
9898 style, !using_drap);
9899 /* If not an i386, mov & pop is faster than "leave". */
9900 else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun)
9901 || !cfun->machine->use_fast_prologue_epilogue)
9902 ix86_emit_leave (red_offset);
9905 pro_epilogue_adjust_stack (stack_pointer_rtx,
9906 hard_frame_pointer_rtx,
9907 const0_rtx, style, !using_drap);
9909 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx, red_offset);
9914 /* First step is to deallocate the stack frame so that we can
9917 If we realign stack with frame pointer, then stack pointer
9918 won't be able to recover via lea $offset(%bp), %sp, because
9919 there is a padding area between bp and sp for realign.
9920 "add $to_allocate, %sp" must be used instead. */
9923 gcc_assert (frame_pointer_needed);
9924 gcc_assert (!stack_realign_fp);
9925 pro_epilogue_adjust_stack (stack_pointer_rtx,
9926 hard_frame_pointer_rtx,
9927 GEN_INT (offset), style, false);
9928 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
9931 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9932 GEN_INT (frame.nsseregs * 16
9936 else if (frame.to_allocate || frame.padding0 || frame.nsseregs)
9938 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
9939 frame.to_allocate, red_offset,
9941 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9942 GEN_INT (frame.to_allocate
9943 + frame.nsseregs * 16
9944 + frame.padding0), style,
9945 !using_drap && !frame_pointer_needed);
9948 ix86_emit_restore_regs_using_pop (red_offset + frame.nsseregs * 16
9950 red_offset -= offset;
9952 if (frame_pointer_needed)
9954 /* Leave results in shorter dependency chains on CPUs that are
9955 able to grok it fast. */
9956 if (TARGET_USE_LEAVE)
9957 ix86_emit_leave (red_offset);
9960 /* For stack realigned really happens, recover stack
9961 pointer to hard frame pointer is a must, if not using
9963 if (stack_realign_fp)
9964 pro_epilogue_adjust_stack (stack_pointer_rtx,
9965 hard_frame_pointer_rtx,
9966 const0_rtx, style, !using_drap);
9967 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx,
9975 int param_ptr_offset = UNITS_PER_WORD;
9978 gcc_assert (stack_realign_drap);
9980 if (ix86_static_chain_on_stack)
9981 param_ptr_offset += UNITS_PER_WORD;
9982 if (!call_used_regs[REGNO (crtl->drap_reg)])
9983 param_ptr_offset += UNITS_PER_WORD;
9985 insn = emit_insn (gen_rtx_SET
9986 (VOIDmode, stack_pointer_rtx,
9987 gen_rtx_PLUS (Pmode,
9989 GEN_INT (-param_ptr_offset))));
9990 ix86_cfa_state->reg = stack_pointer_rtx;
9991 ix86_cfa_state->offset = param_ptr_offset;
9993 add_reg_note (insn, REG_CFA_DEF_CFA,
9994 gen_rtx_PLUS (Pmode, ix86_cfa_state->reg,
9995 GEN_INT (ix86_cfa_state->offset)));
9996 RTX_FRAME_RELATED_P (insn) = 1;
9998 if (!call_used_regs[REGNO (crtl->drap_reg)])
9999 ix86_emit_restore_reg_using_pop (crtl->drap_reg, -UNITS_PER_WORD);
10002 /* Remove the saved static chain from the stack. The use of ECX is
10003 merely as a scratch register, not as the actual static chain. */
10004 if (ix86_static_chain_on_stack)
10008 gcc_assert (ix86_cfa_state->reg == stack_pointer_rtx);
10009 ix86_cfa_state->offset += UNITS_PER_WORD;
10011 r = gen_rtx_REG (Pmode, CX_REG);
10012 insn = emit_insn (ix86_gen_pop1 (r));
10014 r = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
10015 r = gen_rtx_SET (VOIDmode, stack_pointer_rtx, r);
10016 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10017 RTX_FRAME_RELATED_P (insn) = 1;
10020 /* Sibcall epilogues don't want a return instruction. */
10023 *ix86_cfa_state = cfa_state_save;
10027 if (crtl->args.pops_args && crtl->args.size)
10029 rtx popc = GEN_INT (crtl->args.pops_args);
10031 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10032 address, do explicit add, and jump indirectly to the caller. */
10034 if (crtl->args.pops_args >= 65536)
10036 rtx ecx = gen_rtx_REG (SImode, CX_REG);
10039 /* There is no "pascal" calling convention in any 64bit ABI. */
10040 gcc_assert (!TARGET_64BIT);
10042 insn = emit_insn (gen_popsi1 (ecx));
10043 ix86_cfa_state->offset -= UNITS_PER_WORD;
10045 add_reg_note (insn, REG_CFA_ADJUST_CFA,
10046 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
10047 add_reg_note (insn, REG_CFA_REGISTER,
10048 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
10049 RTX_FRAME_RELATED_P (insn) = 1;
10051 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10053 emit_jump_insn (gen_return_indirect_internal (ecx));
10056 emit_jump_insn (gen_return_pop_internal (popc));
10059 emit_jump_insn (gen_return_internal ());
10061 /* Restore the state back to the state from the prologue,
10062 so that it's correct for the next epilogue. */
10063 *ix86_cfa_state = cfa_state_save;
10066 /* Reset from the function's potential modifications. */
10069 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
10070 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
10072 if (pic_offset_table_rtx)
10073 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
10075 /* Mach-O doesn't support labels at the end of objects, so if
10076 it looks like we might want one, insert a NOP. */
10078 rtx insn = get_last_insn ();
10081 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
10082 insn = PREV_INSN (insn);
10086 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
10087 fputs ("\tnop\n", file);
10093 /* Extract the parts of an RTL expression that is a valid memory address
10094 for an instruction. Return 0 if the structure of the address is
10095 grossly off. Return -1 if the address contains ASHIFT, so it is not
10096 strictly valid, but still used for computing length of lea instruction. */
10099 ix86_decompose_address (rtx addr, struct ix86_address *out)
10101 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
10102 rtx base_reg, index_reg;
10103 HOST_WIDE_INT scale = 1;
10104 rtx scale_rtx = NULL_RTX;
10107 enum ix86_address_seg seg = SEG_DEFAULT;
10109 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
10111 else if (GET_CODE (addr) == PLUS)
10113 rtx addends[4], op;
10121 addends[n++] = XEXP (op, 1);
10124 while (GET_CODE (op) == PLUS);
10129 for (i = n; i >= 0; --i)
10132 switch (GET_CODE (op))
10137 index = XEXP (op, 0);
10138 scale_rtx = XEXP (op, 1);
10144 index = XEXP (op, 0);
10145 tmp = XEXP (op, 1);
10146 if (!CONST_INT_P (tmp))
10148 scale = INTVAL (tmp);
10149 if ((unsigned HOST_WIDE_INT) scale > 3)
10151 scale = 1 << scale;
10155 if (XINT (op, 1) == UNSPEC_TP
10156 && TARGET_TLS_DIRECT_SEG_REFS
10157 && seg == SEG_DEFAULT)
10158 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
10187 else if (GET_CODE (addr) == MULT)
10189 index = XEXP (addr, 0); /* index*scale */
10190 scale_rtx = XEXP (addr, 1);
10192 else if (GET_CODE (addr) == ASHIFT)
10194 /* We're called for lea too, which implements ashift on occasion. */
10195 index = XEXP (addr, 0);
10196 tmp = XEXP (addr, 1);
10197 if (!CONST_INT_P (tmp))
10199 scale = INTVAL (tmp);
10200 if ((unsigned HOST_WIDE_INT) scale > 3)
10202 scale = 1 << scale;
10206 disp = addr; /* displacement */
10208 /* Extract the integral value of scale. */
10211 if (!CONST_INT_P (scale_rtx))
10213 scale = INTVAL (scale_rtx);
10216 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
10217 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
10219 /* Avoid useless 0 displacement. */
10220 if (disp == const0_rtx && (base || index))
10223 /* Allow arg pointer and stack pointer as index if there is not scaling. */
10224 if (base_reg && index_reg && scale == 1
10225 && (index_reg == arg_pointer_rtx
10226 || index_reg == frame_pointer_rtx
10227 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
10230 tmp = base, base = index, index = tmp;
10231 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
10234 /* Special case: %ebp cannot be encoded as a base without a displacement.
10238 && (base_reg == hard_frame_pointer_rtx
10239 || base_reg == frame_pointer_rtx
10240 || base_reg == arg_pointer_rtx
10241 || (REG_P (base_reg)
10242 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
10243 || REGNO (base_reg) == R13_REG))))
10246 /* Special case: on K6, [%esi] makes the instruction vector decoded.
10247 Avoid this by transforming to [%esi+0].
10248 Reload calls address legitimization without cfun defined, so we need
10249 to test cfun for being non-NULL. */
10250 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
10251 && base_reg && !index_reg && !disp
10252 && REG_P (base_reg)
10253 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
10256 /* Special case: encode reg+reg instead of reg*2. */
10257 if (!base && index && scale == 2)
10258 base = index, base_reg = index_reg, scale = 1;
10260 /* Special case: scaling cannot be encoded without base or displacement. */
10261 if (!base && !disp && index && scale != 1)
10265 out->index = index;
10267 out->scale = scale;
10273 /* Return cost of the memory address x.
10274 For i386, it is better to use a complex address than let gcc copy
10275 the address into a reg and make a new pseudo. But not if the address
10276 requires to two regs - that would mean more pseudos with longer
10279 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
10281 struct ix86_address parts;
10283 int ok = ix86_decompose_address (x, &parts);
10287 if (parts.base && GET_CODE (parts.base) == SUBREG)
10288 parts.base = SUBREG_REG (parts.base);
10289 if (parts.index && GET_CODE (parts.index) == SUBREG)
10290 parts.index = SUBREG_REG (parts.index);
10292 /* Attempt to minimize number of registers in the address. */
10294 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
10296 && (!REG_P (parts.index)
10297 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
10301 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
10303 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
10304 && parts.base != parts.index)
10307 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
10308 since it's predecode logic can't detect the length of instructions
10309 and it degenerates to vector decoded. Increase cost of such
10310 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
10311 to split such addresses or even refuse such addresses at all.
10313 Following addressing modes are affected:
10318 The first and last case may be avoidable by explicitly coding the zero in
10319 memory address, but I don't have AMD-K6 machine handy to check this
10323 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
10324 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
10325 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
10331 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
10332 this is used for to form addresses to local data when -fPIC is in
10336 darwin_local_data_pic (rtx disp)
10338 return (GET_CODE (disp) == UNSPEC
10339 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
10342 /* Determine if a given RTX is a valid constant. We already know this
10343 satisfies CONSTANT_P. */
10346 legitimate_constant_p (rtx x)
10348 switch (GET_CODE (x))
10353 if (GET_CODE (x) == PLUS)
10355 if (!CONST_INT_P (XEXP (x, 1)))
10360 if (TARGET_MACHO && darwin_local_data_pic (x))
10363 /* Only some unspecs are valid as "constants". */
10364 if (GET_CODE (x) == UNSPEC)
10365 switch (XINT (x, 1))
10368 case UNSPEC_GOTOFF:
10369 case UNSPEC_PLTOFF:
10370 return TARGET_64BIT;
10372 case UNSPEC_NTPOFF:
10373 x = XVECEXP (x, 0, 0);
10374 return (GET_CODE (x) == SYMBOL_REF
10375 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
10376 case UNSPEC_DTPOFF:
10377 x = XVECEXP (x, 0, 0);
10378 return (GET_CODE (x) == SYMBOL_REF
10379 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
10384 /* We must have drilled down to a symbol. */
10385 if (GET_CODE (x) == LABEL_REF)
10387 if (GET_CODE (x) != SYMBOL_REF)
10392 /* TLS symbols are never valid. */
10393 if (SYMBOL_REF_TLS_MODEL (x))
10396 /* DLLIMPORT symbols are never valid. */
10397 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10398 && SYMBOL_REF_DLLIMPORT_P (x))
10403 if (GET_MODE (x) == TImode
10404 && x != CONST0_RTX (TImode)
10410 if (!standard_sse_constant_p (x))
10417 /* Otherwise we handle everything else in the move patterns. */
10421 /* Determine if it's legal to put X into the constant pool. This
10422 is not possible for the address of thread-local symbols, which
10423 is checked above. */
10426 ix86_cannot_force_const_mem (rtx x)
10428 /* We can always put integral constants and vectors in memory. */
10429 switch (GET_CODE (x))
10439 return !legitimate_constant_p (x);
10443 /* Nonzero if the constant value X is a legitimate general operand
10444 when generating PIC code. It is given that flag_pic is on and
10445 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
10448 legitimate_pic_operand_p (rtx x)
10452 switch (GET_CODE (x))
10455 inner = XEXP (x, 0);
10456 if (GET_CODE (inner) == PLUS
10457 && CONST_INT_P (XEXP (inner, 1)))
10458 inner = XEXP (inner, 0);
10460 /* Only some unspecs are valid as "constants". */
10461 if (GET_CODE (inner) == UNSPEC)
10462 switch (XINT (inner, 1))
10465 case UNSPEC_GOTOFF:
10466 case UNSPEC_PLTOFF:
10467 return TARGET_64BIT;
10469 x = XVECEXP (inner, 0, 0);
10470 return (GET_CODE (x) == SYMBOL_REF
10471 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
10472 case UNSPEC_MACHOPIC_OFFSET:
10473 return legitimate_pic_address_disp_p (x);
10481 return legitimate_pic_address_disp_p (x);
10488 /* Determine if a given CONST RTX is a valid memory displacement
10492 legitimate_pic_address_disp_p (rtx disp)
10496 /* In 64bit mode we can allow direct addresses of symbols and labels
10497 when they are not dynamic symbols. */
10500 rtx op0 = disp, op1;
10502 switch (GET_CODE (disp))
10508 if (GET_CODE (XEXP (disp, 0)) != PLUS)
10510 op0 = XEXP (XEXP (disp, 0), 0);
10511 op1 = XEXP (XEXP (disp, 0), 1);
10512 if (!CONST_INT_P (op1)
10513 || INTVAL (op1) >= 16*1024*1024
10514 || INTVAL (op1) < -16*1024*1024)
10516 if (GET_CODE (op0) == LABEL_REF)
10518 if (GET_CODE (op0) != SYMBOL_REF)
10523 /* TLS references should always be enclosed in UNSPEC. */
10524 if (SYMBOL_REF_TLS_MODEL (op0))
10526 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
10527 && ix86_cmodel != CM_LARGE_PIC)
10535 if (GET_CODE (disp) != CONST)
10537 disp = XEXP (disp, 0);
10541 /* We are unsafe to allow PLUS expressions. This limit allowed distance
10542 of GOT tables. We should not need these anyway. */
10543 if (GET_CODE (disp) != UNSPEC
10544 || (XINT (disp, 1) != UNSPEC_GOTPCREL
10545 && XINT (disp, 1) != UNSPEC_GOTOFF
10546 && XINT (disp, 1) != UNSPEC_PLTOFF))
10549 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
10550 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
10556 if (GET_CODE (disp) == PLUS)
10558 if (!CONST_INT_P (XEXP (disp, 1)))
10560 disp = XEXP (disp, 0);
10564 if (TARGET_MACHO && darwin_local_data_pic (disp))
10567 if (GET_CODE (disp) != UNSPEC)
10570 switch (XINT (disp, 1))
10575 /* We need to check for both symbols and labels because VxWorks loads
10576 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
10578 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
10579 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
10580 case UNSPEC_GOTOFF:
10581 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
10582 While ABI specify also 32bit relocation but we don't produce it in
10583 small PIC model at all. */
10584 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
10585 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
10587 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
10589 case UNSPEC_GOTTPOFF:
10590 case UNSPEC_GOTNTPOFF:
10591 case UNSPEC_INDNTPOFF:
10594 disp = XVECEXP (disp, 0, 0);
10595 return (GET_CODE (disp) == SYMBOL_REF
10596 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
10597 case UNSPEC_NTPOFF:
10598 disp = XVECEXP (disp, 0, 0);
10599 return (GET_CODE (disp) == SYMBOL_REF
10600 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
10601 case UNSPEC_DTPOFF:
10602 disp = XVECEXP (disp, 0, 0);
10603 return (GET_CODE (disp) == SYMBOL_REF
10604 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
10610 /* Recognizes RTL expressions that are valid memory addresses for an
10611 instruction. The MODE argument is the machine mode for the MEM
10612 expression that wants to use this address.
10614 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
10615 convert common non-canonical forms to canonical form so that they will
10619 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
10620 rtx addr, bool strict)
10622 struct ix86_address parts;
10623 rtx base, index, disp;
10624 HOST_WIDE_INT scale;
10626 if (ix86_decompose_address (addr, &parts) <= 0)
10627 /* Decomposition failed. */
10631 index = parts.index;
10633 scale = parts.scale;
10635 /* Validate base register.
10637 Don't allow SUBREG's that span more than a word here. It can lead to spill
10638 failures when the base is one word out of a two word structure, which is
10639 represented internally as a DImode int. */
10647 else if (GET_CODE (base) == SUBREG
10648 && REG_P (SUBREG_REG (base))
10649 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
10651 reg = SUBREG_REG (base);
10653 /* Base is not a register. */
10656 if (GET_MODE (base) != Pmode)
10657 /* Base is not in Pmode. */
10660 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
10661 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
10662 /* Base is not valid. */
10666 /* Validate index register.
10668 Don't allow SUBREG's that span more than a word here -- same as above. */
10676 else if (GET_CODE (index) == SUBREG
10677 && REG_P (SUBREG_REG (index))
10678 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
10680 reg = SUBREG_REG (index);
10682 /* Index is not a register. */
10685 if (GET_MODE (index) != Pmode)
10686 /* Index is not in Pmode. */
10689 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
10690 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
10691 /* Index is not valid. */
10695 /* Validate scale factor. */
10699 /* Scale without index. */
10702 if (scale != 2 && scale != 4 && scale != 8)
10703 /* Scale is not a valid multiplier. */
10707 /* Validate displacement. */
10710 if (GET_CODE (disp) == CONST
10711 && GET_CODE (XEXP (disp, 0)) == UNSPEC
10712 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
10713 switch (XINT (XEXP (disp, 0), 1))
10715 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
10716 used. While ABI specify also 32bit relocations, we don't produce
10717 them at all and use IP relative instead. */
10719 case UNSPEC_GOTOFF:
10720 gcc_assert (flag_pic);
10722 goto is_legitimate_pic;
10724 /* 64bit address unspec. */
10727 case UNSPEC_GOTPCREL:
10728 gcc_assert (flag_pic);
10729 goto is_legitimate_pic;
10731 case UNSPEC_GOTTPOFF:
10732 case UNSPEC_GOTNTPOFF:
10733 case UNSPEC_INDNTPOFF:
10734 case UNSPEC_NTPOFF:
10735 case UNSPEC_DTPOFF:
10739 /* Invalid address unspec. */
10743 else if (SYMBOLIC_CONST (disp)
10747 && MACHOPIC_INDIRECT
10748 && !machopic_operand_p (disp)
10754 if (TARGET_64BIT && (index || base))
10756 /* foo@dtpoff(%rX) is ok. */
10757 if (GET_CODE (disp) != CONST
10758 || GET_CODE (XEXP (disp, 0)) != PLUS
10759 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
10760 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
10761 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
10762 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
10763 /* Non-constant pic memory reference. */
10766 else if (! legitimate_pic_address_disp_p (disp))
10767 /* Displacement is an invalid pic construct. */
10770 /* This code used to verify that a symbolic pic displacement
10771 includes the pic_offset_table_rtx register.
10773 While this is good idea, unfortunately these constructs may
10774 be created by "adds using lea" optimization for incorrect
10783 This code is nonsensical, but results in addressing
10784 GOT table with pic_offset_table_rtx base. We can't
10785 just refuse it easily, since it gets matched by
10786 "addsi3" pattern, that later gets split to lea in the
10787 case output register differs from input. While this
10788 can be handled by separate addsi pattern for this case
10789 that never results in lea, this seems to be easier and
10790 correct fix for crash to disable this test. */
10792 else if (GET_CODE (disp) != LABEL_REF
10793 && !CONST_INT_P (disp)
10794 && (GET_CODE (disp) != CONST
10795 || !legitimate_constant_p (disp))
10796 && (GET_CODE (disp) != SYMBOL_REF
10797 || !legitimate_constant_p (disp)))
10798 /* Displacement is not constant. */
10800 else if (TARGET_64BIT
10801 && !x86_64_immediate_operand (disp, VOIDmode))
10802 /* Displacement is out of range. */
10806 /* Everything looks valid. */
10810 /* Determine if a given RTX is a valid constant address. */
10813 constant_address_p (rtx x)
10815 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
10818 /* Return a unique alias set for the GOT. */
10820 static alias_set_type
10821 ix86_GOT_alias_set (void)
10823 static alias_set_type set = -1;
10825 set = new_alias_set ();
10829 /* Return a legitimate reference for ORIG (an address) using the
10830 register REG. If REG is 0, a new pseudo is generated.
10832 There are two types of references that must be handled:
10834 1. Global data references must load the address from the GOT, via
10835 the PIC reg. An insn is emitted to do this load, and the reg is
10838 2. Static data references, constant pool addresses, and code labels
10839 compute the address as an offset from the GOT, whose base is in
10840 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
10841 differentiate them from global data objects. The returned
10842 address is the PIC reg + an unspec constant.
10844 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
10845 reg also appears in the address. */
10848 legitimize_pic_address (rtx orig, rtx reg)
10851 rtx new_rtx = orig;
10855 if (TARGET_MACHO && !TARGET_64BIT)
10858 reg = gen_reg_rtx (Pmode);
10859 /* Use the generic Mach-O PIC machinery. */
10860 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
10864 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
10866 else if (TARGET_64BIT
10867 && ix86_cmodel != CM_SMALL_PIC
10868 && gotoff_operand (addr, Pmode))
10871 /* This symbol may be referenced via a displacement from the PIC
10872 base address (@GOTOFF). */
10874 if (reload_in_progress)
10875 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
10876 if (GET_CODE (addr) == CONST)
10877 addr = XEXP (addr, 0);
10878 if (GET_CODE (addr) == PLUS)
10880 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
10882 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
10885 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
10886 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10888 tmpreg = gen_reg_rtx (Pmode);
10891 emit_move_insn (tmpreg, new_rtx);
10895 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
10896 tmpreg, 1, OPTAB_DIRECT);
10899 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
10901 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
10903 /* This symbol may be referenced via a displacement from the PIC
10904 base address (@GOTOFF). */
10906 if (reload_in_progress)
10907 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
10908 if (GET_CODE (addr) == CONST)
10909 addr = XEXP (addr, 0);
10910 if (GET_CODE (addr) == PLUS)
10912 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
10914 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
10917 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
10918 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10919 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
10923 emit_move_insn (reg, new_rtx);
10927 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
10928 /* We can't use @GOTOFF for text labels on VxWorks;
10929 see gotoff_operand. */
10930 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
10932 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
10934 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
10935 return legitimize_dllimport_symbol (addr, true);
10936 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
10937 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
10938 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
10940 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
10941 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
10945 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
10947 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
10948 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10949 new_rtx = gen_const_mem (Pmode, new_rtx);
10950 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
10953 reg = gen_reg_rtx (Pmode);
10954 /* Use directly gen_movsi, otherwise the address is loaded
10955 into register for CSE. We don't want to CSE this addresses,
10956 instead we CSE addresses from the GOT table, so skip this. */
10957 emit_insn (gen_movsi (reg, new_rtx));
10962 /* This symbol must be referenced via a load from the
10963 Global Offset Table (@GOT). */
10965 if (reload_in_progress)
10966 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
10967 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
10968 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10970 new_rtx = force_reg (Pmode, new_rtx);
10971 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
10972 new_rtx = gen_const_mem (Pmode, new_rtx);
10973 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
10976 reg = gen_reg_rtx (Pmode);
10977 emit_move_insn (reg, new_rtx);
10983 if (CONST_INT_P (addr)
10984 && !x86_64_immediate_operand (addr, VOIDmode))
10988 emit_move_insn (reg, addr);
10992 new_rtx = force_reg (Pmode, addr);
10994 else if (GET_CODE (addr) == CONST)
10996 addr = XEXP (addr, 0);
10998 /* We must match stuff we generate before. Assume the only
10999 unspecs that can get here are ours. Not that we could do
11000 anything with them anyway.... */
11001 if (GET_CODE (addr) == UNSPEC
11002 || (GET_CODE (addr) == PLUS
11003 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
11005 gcc_assert (GET_CODE (addr) == PLUS);
11007 if (GET_CODE (addr) == PLUS)
11009 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
11011 /* Check first to see if this is a constant offset from a @GOTOFF
11012 symbol reference. */
11013 if (gotoff_operand (op0, Pmode)
11014 && CONST_INT_P (op1))
11018 if (reload_in_progress)
11019 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11020 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
11022 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
11023 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11024 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11028 emit_move_insn (reg, new_rtx);
11034 if (INTVAL (op1) < -16*1024*1024
11035 || INTVAL (op1) >= 16*1024*1024)
11037 if (!x86_64_immediate_operand (op1, Pmode))
11038 op1 = force_reg (Pmode, op1);
11039 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
11045 base = legitimize_pic_address (XEXP (addr, 0), reg);
11046 new_rtx = legitimize_pic_address (XEXP (addr, 1),
11047 base == reg ? NULL_RTX : reg);
11049 if (CONST_INT_P (new_rtx))
11050 new_rtx = plus_constant (base, INTVAL (new_rtx));
11053 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
11055 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
11056 new_rtx = XEXP (new_rtx, 1);
11058 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
11066 /* Load the thread pointer. If TO_REG is true, force it into a register. */
11069 get_thread_pointer (int to_reg)
11073 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
11077 reg = gen_reg_rtx (Pmode);
11078 insn = gen_rtx_SET (VOIDmode, reg, tp);
11079 insn = emit_insn (insn);
11084 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
11085 false if we expect this to be used for a memory address and true if
11086 we expect to load the address into a register. */
11089 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
11091 rtx dest, base, off, pic, tp;
11096 case TLS_MODEL_GLOBAL_DYNAMIC:
11097 dest = gen_reg_rtx (Pmode);
11098 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
11100 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
11102 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
11105 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
11106 insns = get_insns ();
11109 RTL_CONST_CALL_P (insns) = 1;
11110 emit_libcall_block (insns, dest, rax, x);
11112 else if (TARGET_64BIT && TARGET_GNU2_TLS)
11113 emit_insn (gen_tls_global_dynamic_64 (dest, x));
11115 emit_insn (gen_tls_global_dynamic_32 (dest, x));
11117 if (TARGET_GNU2_TLS)
11119 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
11121 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
11125 case TLS_MODEL_LOCAL_DYNAMIC:
11126 base = gen_reg_rtx (Pmode);
11127 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
11129 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
11131 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
11134 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
11135 insns = get_insns ();
11138 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
11139 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
11140 RTL_CONST_CALL_P (insns) = 1;
11141 emit_libcall_block (insns, base, rax, note);
11143 else if (TARGET_64BIT && TARGET_GNU2_TLS)
11144 emit_insn (gen_tls_local_dynamic_base_64 (base));
11146 emit_insn (gen_tls_local_dynamic_base_32 (base));
11148 if (TARGET_GNU2_TLS)
11150 rtx x = ix86_tls_module_base ();
11152 set_unique_reg_note (get_last_insn (), REG_EQUIV,
11153 gen_rtx_MINUS (Pmode, x, tp));
11156 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
11157 off = gen_rtx_CONST (Pmode, off);
11159 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
11161 if (TARGET_GNU2_TLS)
11163 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
11165 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
11170 case TLS_MODEL_INITIAL_EXEC:
11174 type = UNSPEC_GOTNTPOFF;
11178 if (reload_in_progress)
11179 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11180 pic = pic_offset_table_rtx;
11181 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
11183 else if (!TARGET_ANY_GNU_TLS)
11185 pic = gen_reg_rtx (Pmode);
11186 emit_insn (gen_set_got (pic));
11187 type = UNSPEC_GOTTPOFF;
11192 type = UNSPEC_INDNTPOFF;
11195 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
11196 off = gen_rtx_CONST (Pmode, off);
11198 off = gen_rtx_PLUS (Pmode, pic, off);
11199 off = gen_const_mem (Pmode, off);
11200 set_mem_alias_set (off, ix86_GOT_alias_set ());
11202 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11204 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11205 off = force_reg (Pmode, off);
11206 return gen_rtx_PLUS (Pmode, base, off);
11210 base = get_thread_pointer (true);
11211 dest = gen_reg_rtx (Pmode);
11212 emit_insn (gen_subsi3 (dest, base, off));
11216 case TLS_MODEL_LOCAL_EXEC:
11217 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
11218 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11219 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
11220 off = gen_rtx_CONST (Pmode, off);
11222 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11224 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11225 return gen_rtx_PLUS (Pmode, base, off);
11229 base = get_thread_pointer (true);
11230 dest = gen_reg_rtx (Pmode);
11231 emit_insn (gen_subsi3 (dest, base, off));
11236 gcc_unreachable ();
11242 /* Create or return the unique __imp_DECL dllimport symbol corresponding
11245 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
11246 htab_t dllimport_map;
11249 get_dllimport_decl (tree decl)
11251 struct tree_map *h, in;
11254 const char *prefix;
11255 size_t namelen, prefixlen;
11260 if (!dllimport_map)
11261 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
11263 in.hash = htab_hash_pointer (decl);
11264 in.base.from = decl;
11265 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
11266 h = (struct tree_map *) *loc;
11270 *loc = h = ggc_alloc_tree_map ();
11272 h->base.from = decl;
11273 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
11274 VAR_DECL, NULL, ptr_type_node);
11275 DECL_ARTIFICIAL (to) = 1;
11276 DECL_IGNORED_P (to) = 1;
11277 DECL_EXTERNAL (to) = 1;
11278 TREE_READONLY (to) = 1;
11280 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
11281 name = targetm.strip_name_encoding (name);
11282 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
11283 ? "*__imp_" : "*__imp__";
11284 namelen = strlen (name);
11285 prefixlen = strlen (prefix);
11286 imp_name = (char *) alloca (namelen + prefixlen + 1);
11287 memcpy (imp_name, prefix, prefixlen);
11288 memcpy (imp_name + prefixlen, name, namelen + 1);
11290 name = ggc_alloc_string (imp_name, namelen + prefixlen);
11291 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
11292 SET_SYMBOL_REF_DECL (rtl, to);
11293 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
11295 rtl = gen_const_mem (Pmode, rtl);
11296 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
11298 SET_DECL_RTL (to, rtl);
11299 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
11304 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
11305 true if we require the result be a register. */
11308 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
11313 gcc_assert (SYMBOL_REF_DECL (symbol));
11314 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
11316 x = DECL_RTL (imp_decl);
11318 x = force_reg (Pmode, x);
11322 /* Try machine-dependent ways of modifying an illegitimate address
11323 to be legitimate. If we find one, return the new, valid address.
11324 This macro is used in only one place: `memory_address' in explow.c.
11326 OLDX is the address as it was before break_out_memory_refs was called.
11327 In some cases it is useful to look at this to decide what needs to be done.
11329 It is always safe for this macro to do nothing. It exists to recognize
11330 opportunities to optimize the output.
11332 For the 80386, we handle X+REG by loading X into a register R and
11333 using R+REG. R will go in a general reg and indexing will be used.
11334 However, if REG is a broken-out memory address or multiplication,
11335 nothing needs to be done because REG can certainly go in a general reg.
11337 When -fpic is used, special handling is needed for symbolic references.
11338 See comments by legitimize_pic_address in i386.c for details. */
11341 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
11342 enum machine_mode mode)
11347 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
11349 return legitimize_tls_address (x, (enum tls_model) log, false);
11350 if (GET_CODE (x) == CONST
11351 && GET_CODE (XEXP (x, 0)) == PLUS
11352 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
11353 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
11355 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
11356 (enum tls_model) log, false);
11357 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
11360 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11362 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
11363 return legitimize_dllimport_symbol (x, true);
11364 if (GET_CODE (x) == CONST
11365 && GET_CODE (XEXP (x, 0)) == PLUS
11366 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
11367 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
11369 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
11370 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
11374 if (flag_pic && SYMBOLIC_CONST (x))
11375 return legitimize_pic_address (x, 0);
11377 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
11378 if (GET_CODE (x) == ASHIFT
11379 && CONST_INT_P (XEXP (x, 1))
11380 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
11383 log = INTVAL (XEXP (x, 1));
11384 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
11385 GEN_INT (1 << log));
11388 if (GET_CODE (x) == PLUS)
11390 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
11392 if (GET_CODE (XEXP (x, 0)) == ASHIFT
11393 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11394 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
11397 log = INTVAL (XEXP (XEXP (x, 0), 1));
11398 XEXP (x, 0) = gen_rtx_MULT (Pmode,
11399 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
11400 GEN_INT (1 << log));
11403 if (GET_CODE (XEXP (x, 1)) == ASHIFT
11404 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11405 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
11408 log = INTVAL (XEXP (XEXP (x, 1), 1));
11409 XEXP (x, 1) = gen_rtx_MULT (Pmode,
11410 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
11411 GEN_INT (1 << log));
11414 /* Put multiply first if it isn't already. */
11415 if (GET_CODE (XEXP (x, 1)) == MULT)
11417 rtx tmp = XEXP (x, 0);
11418 XEXP (x, 0) = XEXP (x, 1);
11423 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
11424 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
11425 created by virtual register instantiation, register elimination, and
11426 similar optimizations. */
11427 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
11430 x = gen_rtx_PLUS (Pmode,
11431 gen_rtx_PLUS (Pmode, XEXP (x, 0),
11432 XEXP (XEXP (x, 1), 0)),
11433 XEXP (XEXP (x, 1), 1));
11437 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
11438 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
11439 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
11440 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11441 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
11442 && CONSTANT_P (XEXP (x, 1)))
11445 rtx other = NULL_RTX;
11447 if (CONST_INT_P (XEXP (x, 1)))
11449 constant = XEXP (x, 1);
11450 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
11452 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
11454 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
11455 other = XEXP (x, 1);
11463 x = gen_rtx_PLUS (Pmode,
11464 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
11465 XEXP (XEXP (XEXP (x, 0), 1), 0)),
11466 plus_constant (other, INTVAL (constant)));
11470 if (changed && ix86_legitimate_address_p (mode, x, FALSE))
11473 if (GET_CODE (XEXP (x, 0)) == MULT)
11476 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
11479 if (GET_CODE (XEXP (x, 1)) == MULT)
11482 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
11486 && REG_P (XEXP (x, 1))
11487 && REG_P (XEXP (x, 0)))
11490 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
11493 x = legitimize_pic_address (x, 0);
11496 if (changed && ix86_legitimate_address_p (mode, x, FALSE))
11499 if (REG_P (XEXP (x, 0)))
11501 rtx temp = gen_reg_rtx (Pmode);
11502 rtx val = force_operand (XEXP (x, 1), temp);
11504 emit_move_insn (temp, val);
11506 XEXP (x, 1) = temp;
11510 else if (REG_P (XEXP (x, 1)))
11512 rtx temp = gen_reg_rtx (Pmode);
11513 rtx val = force_operand (XEXP (x, 0), temp);
11515 emit_move_insn (temp, val);
11517 XEXP (x, 0) = temp;
11525 /* Print an integer constant expression in assembler syntax. Addition
11526 and subtraction are the only arithmetic that may appear in these
11527 expressions. FILE is the stdio stream to write to, X is the rtx, and
11528 CODE is the operand print code from the output string. */
11531 output_pic_addr_const (FILE *file, rtx x, int code)
11535 switch (GET_CODE (x))
11538 gcc_assert (flag_pic);
11543 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
11544 output_addr_const (file, x);
11547 const char *name = XSTR (x, 0);
11549 /* Mark the decl as referenced so that cgraph will
11550 output the function. */
11551 if (SYMBOL_REF_DECL (x))
11552 mark_decl_referenced (SYMBOL_REF_DECL (x));
11555 if (MACHOPIC_INDIRECT
11556 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
11557 name = machopic_indirection_name (x, /*stub_p=*/true);
11559 assemble_name (file, name);
11561 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
11562 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
11563 fputs ("@PLT", file);
11570 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
11571 assemble_name (asm_out_file, buf);
11575 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
11579 /* This used to output parentheses around the expression,
11580 but that does not work on the 386 (either ATT or BSD assembler). */
11581 output_pic_addr_const (file, XEXP (x, 0), code);
11585 if (GET_MODE (x) == VOIDmode)
11587 /* We can use %d if the number is <32 bits and positive. */
11588 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
11589 fprintf (file, "0x%lx%08lx",
11590 (unsigned long) CONST_DOUBLE_HIGH (x),
11591 (unsigned long) CONST_DOUBLE_LOW (x));
11593 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
11596 /* We can't handle floating point constants;
11597 TARGET_PRINT_OPERAND must handle them. */
11598 output_operand_lossage ("floating constant misused");
11602 /* Some assemblers need integer constants to appear first. */
11603 if (CONST_INT_P (XEXP (x, 0)))
11605 output_pic_addr_const (file, XEXP (x, 0), code);
11607 output_pic_addr_const (file, XEXP (x, 1), code);
11611 gcc_assert (CONST_INT_P (XEXP (x, 1)));
11612 output_pic_addr_const (file, XEXP (x, 1), code);
11614 output_pic_addr_const (file, XEXP (x, 0), code);
11620 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
11621 output_pic_addr_const (file, XEXP (x, 0), code);
11623 output_pic_addr_const (file, XEXP (x, 1), code);
11625 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
11629 gcc_assert (XVECLEN (x, 0) == 1);
11630 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
11631 switch (XINT (x, 1))
11634 fputs ("@GOT", file);
11636 case UNSPEC_GOTOFF:
11637 fputs ("@GOTOFF", file);
11639 case UNSPEC_PLTOFF:
11640 fputs ("@PLTOFF", file);
11642 case UNSPEC_GOTPCREL:
11643 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11644 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
11646 case UNSPEC_GOTTPOFF:
11647 /* FIXME: This might be @TPOFF in Sun ld too. */
11648 fputs ("@gottpoff", file);
11651 fputs ("@tpoff", file);
11653 case UNSPEC_NTPOFF:
11655 fputs ("@tpoff", file);
11657 fputs ("@ntpoff", file);
11659 case UNSPEC_DTPOFF:
11660 fputs ("@dtpoff", file);
11662 case UNSPEC_GOTNTPOFF:
11664 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11665 "@gottpoff(%rip)": "@gottpoff[rip]", file);
11667 fputs ("@gotntpoff", file);
11669 case UNSPEC_INDNTPOFF:
11670 fputs ("@indntpoff", file);
11673 case UNSPEC_MACHOPIC_OFFSET:
11675 machopic_output_function_base_name (file);
11679 output_operand_lossage ("invalid UNSPEC as operand");
11685 output_operand_lossage ("invalid expression as operand");
11689 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
11690 We need to emit DTP-relative relocations. */
11692 static void ATTRIBUTE_UNUSED
11693 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
11695 fputs (ASM_LONG, file);
11696 output_addr_const (file, x);
11697 fputs ("@dtpoff", file);
11703 fputs (", 0", file);
11706 gcc_unreachable ();
11710 /* Return true if X is a representation of the PIC register. This copes
11711 with calls from ix86_find_base_term, where the register might have
11712 been replaced by a cselib value. */
11715 ix86_pic_register_p (rtx x)
11717 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
11718 return (pic_offset_table_rtx
11719 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
11721 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
11724 /* In the name of slightly smaller debug output, and to cater to
11725 general assembler lossage, recognize PIC+GOTOFF and turn it back
11726 into a direct symbol reference.
11728 On Darwin, this is necessary to avoid a crash, because Darwin
11729 has a different PIC label for each routine but the DWARF debugging
11730 information is not associated with any particular routine, so it's
11731 necessary to remove references to the PIC label from RTL stored by
11732 the DWARF output code. */
11735 ix86_delegitimize_address (rtx x)
11737 rtx orig_x = delegitimize_mem_from_attrs (x);
11738 /* addend is NULL or some rtx if x is something+GOTOFF where
11739 something doesn't include the PIC register. */
11740 rtx addend = NULL_RTX;
11741 /* reg_addend is NULL or a multiple of some register. */
11742 rtx reg_addend = NULL_RTX;
11743 /* const_addend is NULL or a const_int. */
11744 rtx const_addend = NULL_RTX;
11745 /* This is the result, or NULL. */
11746 rtx result = NULL_RTX;
11755 if (GET_CODE (x) != CONST
11756 || GET_CODE (XEXP (x, 0)) != UNSPEC
11757 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
11758 || !MEM_P (orig_x))
11760 x = XVECEXP (XEXP (x, 0), 0, 0);
11761 if (GET_MODE (orig_x) != Pmode)
11762 return simplify_gen_subreg (GET_MODE (orig_x), x, Pmode, 0);
11766 if (GET_CODE (x) != PLUS
11767 || GET_CODE (XEXP (x, 1)) != CONST)
11770 if (ix86_pic_register_p (XEXP (x, 0)))
11771 /* %ebx + GOT/GOTOFF */
11773 else if (GET_CODE (XEXP (x, 0)) == PLUS)
11775 /* %ebx + %reg * scale + GOT/GOTOFF */
11776 reg_addend = XEXP (x, 0);
11777 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
11778 reg_addend = XEXP (reg_addend, 1);
11779 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
11780 reg_addend = XEXP (reg_addend, 0);
11783 reg_addend = NULL_RTX;
11784 addend = XEXP (x, 0);
11788 addend = XEXP (x, 0);
11790 x = XEXP (XEXP (x, 1), 0);
11791 if (GET_CODE (x) == PLUS
11792 && CONST_INT_P (XEXP (x, 1)))
11794 const_addend = XEXP (x, 1);
11798 if (GET_CODE (x) == UNSPEC
11799 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
11800 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
11801 result = XVECEXP (x, 0, 0);
11803 if (TARGET_MACHO && darwin_local_data_pic (x)
11804 && !MEM_P (orig_x))
11805 result = XVECEXP (x, 0, 0);
11811 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
11813 result = gen_rtx_PLUS (Pmode, reg_addend, result);
11816 /* If the rest of original X doesn't involve the PIC register, add
11817 addend and subtract pic_offset_table_rtx. This can happen e.g.
11819 leal (%ebx, %ecx, 4), %ecx
11821 movl foo@GOTOFF(%ecx), %edx
11822 in which case we return (%ecx - %ebx) + foo. */
11823 if (pic_offset_table_rtx)
11824 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
11825 pic_offset_table_rtx),
11830 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
11831 return simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
11835 /* If X is a machine specific address (i.e. a symbol or label being
11836 referenced as a displacement from the GOT implemented using an
11837 UNSPEC), then return the base term. Otherwise return X. */
11840 ix86_find_base_term (rtx x)
11846 if (GET_CODE (x) != CONST)
11848 term = XEXP (x, 0);
11849 if (GET_CODE (term) == PLUS
11850 && (CONST_INT_P (XEXP (term, 1))
11851 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
11852 term = XEXP (term, 0);
11853 if (GET_CODE (term) != UNSPEC
11854 || XINT (term, 1) != UNSPEC_GOTPCREL)
11857 return XVECEXP (term, 0, 0);
11860 return ix86_delegitimize_address (x);
11864 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
11865 int fp, FILE *file)
11867 const char *suffix;
11869 if (mode == CCFPmode || mode == CCFPUmode)
11871 code = ix86_fp_compare_code_to_integer (code);
11875 code = reverse_condition (code);
11926 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
11930 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
11931 Those same assemblers have the same but opposite lossage on cmov. */
11932 if (mode == CCmode)
11933 suffix = fp ? "nbe" : "a";
11934 else if (mode == CCCmode)
11937 gcc_unreachable ();
11953 gcc_unreachable ();
11957 gcc_assert (mode == CCmode || mode == CCCmode);
11974 gcc_unreachable ();
11978 /* ??? As above. */
11979 gcc_assert (mode == CCmode || mode == CCCmode);
11980 suffix = fp ? "nb" : "ae";
11983 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
11987 /* ??? As above. */
11988 if (mode == CCmode)
11990 else if (mode == CCCmode)
11991 suffix = fp ? "nb" : "ae";
11993 gcc_unreachable ();
11996 suffix = fp ? "u" : "p";
11999 suffix = fp ? "nu" : "np";
12002 gcc_unreachable ();
12004 fputs (suffix, file);
12007 /* Print the name of register X to FILE based on its machine mode and number.
12008 If CODE is 'w', pretend the mode is HImode.
12009 If CODE is 'b', pretend the mode is QImode.
12010 If CODE is 'k', pretend the mode is SImode.
12011 If CODE is 'q', pretend the mode is DImode.
12012 If CODE is 'x', pretend the mode is V4SFmode.
12013 If CODE is 't', pretend the mode is V8SFmode.
12014 If CODE is 'h', pretend the reg is the 'high' byte register.
12015 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
12016 If CODE is 'd', duplicate the operand for AVX instruction.
12020 print_reg (rtx x, int code, FILE *file)
12023 bool duplicated = code == 'd' && TARGET_AVX;
12025 gcc_assert (x == pc_rtx
12026 || (REGNO (x) != ARG_POINTER_REGNUM
12027 && REGNO (x) != FRAME_POINTER_REGNUM
12028 && REGNO (x) != FLAGS_REG
12029 && REGNO (x) != FPSR_REG
12030 && REGNO (x) != FPCR_REG));
12032 if (ASSEMBLER_DIALECT == ASM_ATT)
12037 gcc_assert (TARGET_64BIT);
12038 fputs ("rip", file);
12042 if (code == 'w' || MMX_REG_P (x))
12044 else if (code == 'b')
12046 else if (code == 'k')
12048 else if (code == 'q')
12050 else if (code == 'y')
12052 else if (code == 'h')
12054 else if (code == 'x')
12056 else if (code == 't')
12059 code = GET_MODE_SIZE (GET_MODE (x));
12061 /* Irritatingly, AMD extended registers use different naming convention
12062 from the normal registers. */
12063 if (REX_INT_REG_P (x))
12065 gcc_assert (TARGET_64BIT);
12069 error ("extended registers have no high halves");
12072 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
12075 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
12078 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
12081 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
12084 error ("unsupported operand size for extended register");
12094 if (STACK_TOP_P (x))
12103 if (! ANY_FP_REG_P (x))
12104 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
12109 reg = hi_reg_name[REGNO (x)];
12112 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
12114 reg = qi_reg_name[REGNO (x)];
12117 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
12119 reg = qi_high_reg_name[REGNO (x)];
12124 gcc_assert (!duplicated);
12126 fputs (hi_reg_name[REGNO (x)] + 1, file);
12131 gcc_unreachable ();
12137 if (ASSEMBLER_DIALECT == ASM_ATT)
12138 fprintf (file, ", %%%s", reg);
12140 fprintf (file, ", %s", reg);
12144 /* Locate some local-dynamic symbol still in use by this function
12145 so that we can print its name in some tls_local_dynamic_base
12149 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
12153 if (GET_CODE (x) == SYMBOL_REF
12154 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
12156 cfun->machine->some_ld_name = XSTR (x, 0);
12163 static const char *
12164 get_some_local_dynamic_name (void)
12168 if (cfun->machine->some_ld_name)
12169 return cfun->machine->some_ld_name;
12171 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
12172 if (NONDEBUG_INSN_P (insn)
12173 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
12174 return cfun->machine->some_ld_name;
12179 /* Meaning of CODE:
12180 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
12181 C -- print opcode suffix for set/cmov insn.
12182 c -- like C, but print reversed condition
12183 F,f -- likewise, but for floating-point.
12184 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
12186 R -- print the prefix for register names.
12187 z -- print the opcode suffix for the size of the current operand.
12188 Z -- likewise, with special suffixes for x87 instructions.
12189 * -- print a star (in certain assembler syntax)
12190 A -- print an absolute memory reference.
12191 w -- print the operand as if it's a "word" (HImode) even if it isn't.
12192 s -- print a shift double count, followed by the assemblers argument
12194 b -- print the QImode name of the register for the indicated operand.
12195 %b0 would print %al if operands[0] is reg 0.
12196 w -- likewise, print the HImode name of the register.
12197 k -- likewise, print the SImode name of the register.
12198 q -- likewise, print the DImode name of the register.
12199 x -- likewise, print the V4SFmode name of the register.
12200 t -- likewise, print the V8SFmode name of the register.
12201 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
12202 y -- print "st(0)" instead of "st" as a register.
12203 d -- print duplicated register operand for AVX instruction.
12204 D -- print condition for SSE cmp instruction.
12205 P -- if PIC, print an @PLT suffix.
12206 X -- don't print any sort of PIC '@' suffix for a symbol.
12207 & -- print some in-use local-dynamic symbol name.
12208 H -- print a memory address offset by 8; used for sse high-parts
12209 Y -- print condition for XOP pcom* instruction.
12210 + -- print a branch hint as 'cs' or 'ds' prefix
12211 ; -- print a semicolon (after prefixes due to bug in older gas).
12215 ix86_print_operand (FILE *file, rtx x, int code)
12222 if (ASSEMBLER_DIALECT == ASM_ATT)
12228 const char *name = get_some_local_dynamic_name ();
12230 output_operand_lossage ("'%%&' used without any "
12231 "local dynamic TLS references");
12233 assemble_name (file, name);
12238 switch (ASSEMBLER_DIALECT)
12245 /* Intel syntax. For absolute addresses, registers should not
12246 be surrounded by braces. */
12250 ix86_print_operand (file, x, 0);
12257 gcc_unreachable ();
12260 ix86_print_operand (file, x, 0);
12265 if (ASSEMBLER_DIALECT == ASM_ATT)
12270 if (ASSEMBLER_DIALECT == ASM_ATT)
12275 if (ASSEMBLER_DIALECT == ASM_ATT)
12280 if (ASSEMBLER_DIALECT == ASM_ATT)
12285 if (ASSEMBLER_DIALECT == ASM_ATT)
12290 if (ASSEMBLER_DIALECT == ASM_ATT)
12295 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12297 /* Opcodes don't get size suffixes if using Intel opcodes. */
12298 if (ASSEMBLER_DIALECT == ASM_INTEL)
12301 switch (GET_MODE_SIZE (GET_MODE (x)))
12320 output_operand_lossage
12321 ("invalid operand size for operand code '%c'", code);
12326 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12328 (0, "non-integer operand used with operand code '%c'", code);
12332 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
12333 if (ASSEMBLER_DIALECT == ASM_INTEL)
12336 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12338 switch (GET_MODE_SIZE (GET_MODE (x)))
12341 #ifdef HAVE_AS_IX86_FILDS
12351 #ifdef HAVE_AS_IX86_FILDQ
12354 fputs ("ll", file);
12362 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12364 /* 387 opcodes don't get size suffixes
12365 if the operands are registers. */
12366 if (STACK_REG_P (x))
12369 switch (GET_MODE_SIZE (GET_MODE (x)))
12390 output_operand_lossage
12391 ("invalid operand type used with operand code '%c'", code);
12395 output_operand_lossage
12396 ("invalid operand size for operand code '%c'", code);
12413 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
12415 ix86_print_operand (file, x, 0);
12416 fputs (", ", file);
12421 /* Little bit of braindamage here. The SSE compare instructions
12422 does use completely different names for the comparisons that the
12423 fp conditional moves. */
12426 switch (GET_CODE (x))
12429 fputs ("eq", file);
12432 fputs ("eq_us", file);
12435 fputs ("lt", file);
12438 fputs ("nge", file);
12441 fputs ("le", file);
12444 fputs ("ngt", file);
12447 fputs ("unord", file);
12450 fputs ("neq", file);
12453 fputs ("neq_oq", file);
12456 fputs ("ge", file);
12459 fputs ("nlt", file);
12462 fputs ("gt", file);
12465 fputs ("nle", file);
12468 fputs ("ord", file);
12471 output_operand_lossage ("operand is not a condition code, "
12472 "invalid operand code 'D'");
12478 switch (GET_CODE (x))
12482 fputs ("eq", file);
12486 fputs ("lt", file);
12490 fputs ("le", file);
12493 fputs ("unord", file);
12497 fputs ("neq", file);
12501 fputs ("nlt", file);
12505 fputs ("nle", file);
12508 fputs ("ord", file);
12511 output_operand_lossage ("operand is not a condition code, "
12512 "invalid operand code 'D'");
12518 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12519 if (ASSEMBLER_DIALECT == ASM_ATT)
12521 switch (GET_MODE (x))
12523 case HImode: putc ('w', file); break;
12525 case SFmode: putc ('l', file); break;
12527 case DFmode: putc ('q', file); break;
12528 default: gcc_unreachable ();
12535 if (!COMPARISON_P (x))
12537 output_operand_lossage ("operand is neither a constant nor a "
12538 "condition code, invalid operand code "
12542 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
12545 if (!COMPARISON_P (x))
12547 output_operand_lossage ("operand is neither a constant nor a "
12548 "condition code, invalid operand code "
12552 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12553 if (ASSEMBLER_DIALECT == ASM_ATT)
12556 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
12559 /* Like above, but reverse condition */
12561 /* Check to see if argument to %c is really a constant
12562 and not a condition code which needs to be reversed. */
12563 if (!COMPARISON_P (x))
12565 output_operand_lossage ("operand is neither a constant nor a "
12566 "condition code, invalid operand "
12570 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
12573 if (!COMPARISON_P (x))
12575 output_operand_lossage ("operand is neither a constant nor a "
12576 "condition code, invalid operand "
12580 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12581 if (ASSEMBLER_DIALECT == ASM_ATT)
12584 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
12588 /* It doesn't actually matter what mode we use here, as we're
12589 only going to use this for printing. */
12590 x = adjust_address_nv (x, DImode, 8);
12598 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
12601 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
12604 int pred_val = INTVAL (XEXP (x, 0));
12606 if (pred_val < REG_BR_PROB_BASE * 45 / 100
12607 || pred_val > REG_BR_PROB_BASE * 55 / 100)
12609 int taken = pred_val > REG_BR_PROB_BASE / 2;
12610 int cputaken = final_forward_branch_p (current_output_insn) == 0;
12612 /* Emit hints only in the case default branch prediction
12613 heuristics would fail. */
12614 if (taken != cputaken)
12616 /* We use 3e (DS) prefix for taken branches and
12617 2e (CS) prefix for not taken branches. */
12619 fputs ("ds ; ", file);
12621 fputs ("cs ; ", file);
12629 switch (GET_CODE (x))
12632 fputs ("neq", file);
12635 fputs ("eq", file);
12639 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
12643 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
12647 fputs ("le", file);
12651 fputs ("lt", file);
12654 fputs ("unord", file);
12657 fputs ("ord", file);
12660 fputs ("ueq", file);
12663 fputs ("nlt", file);
12666 fputs ("nle", file);
12669 fputs ("ule", file);
12672 fputs ("ult", file);
12675 fputs ("une", file);
12678 output_operand_lossage ("operand is not a condition code, "
12679 "invalid operand code 'Y'");
12685 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
12691 output_operand_lossage ("invalid operand code '%c'", code);
12696 print_reg (x, code, file);
12698 else if (MEM_P (x))
12700 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
12701 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
12702 && GET_MODE (x) != BLKmode)
12705 switch (GET_MODE_SIZE (GET_MODE (x)))
12707 case 1: size = "BYTE"; break;
12708 case 2: size = "WORD"; break;
12709 case 4: size = "DWORD"; break;
12710 case 8: size = "QWORD"; break;
12711 case 12: size = "TBYTE"; break;
12713 if (GET_MODE (x) == XFmode)
12718 case 32: size = "YMMWORD"; break;
12720 gcc_unreachable ();
12723 /* Check for explicit size override (codes 'b', 'w' and 'k') */
12726 else if (code == 'w')
12728 else if (code == 'k')
12731 fputs (size, file);
12732 fputs (" PTR ", file);
12736 /* Avoid (%rip) for call operands. */
12737 if (CONSTANT_ADDRESS_P (x) && code == 'P'
12738 && !CONST_INT_P (x))
12739 output_addr_const (file, x);
12740 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
12741 output_operand_lossage ("invalid constraints for operand");
12743 output_address (x);
12746 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
12751 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12752 REAL_VALUE_TO_TARGET_SINGLE (r, l);
12754 if (ASSEMBLER_DIALECT == ASM_ATT)
12756 fprintf (file, "0x%08lx", (long unsigned int) l);
12759 /* These float cases don't actually occur as immediate operands. */
12760 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
12764 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
12765 fputs (dstr, file);
12768 else if (GET_CODE (x) == CONST_DOUBLE
12769 && GET_MODE (x) == XFmode)
12773 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
12774 fputs (dstr, file);
12779 /* We have patterns that allow zero sets of memory, for instance.
12780 In 64-bit mode, we should probably support all 8-byte vectors,
12781 since we can in fact encode that into an immediate. */
12782 if (GET_CODE (x) == CONST_VECTOR)
12784 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
12790 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
12792 if (ASSEMBLER_DIALECT == ASM_ATT)
12795 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
12796 || GET_CODE (x) == LABEL_REF)
12798 if (ASSEMBLER_DIALECT == ASM_ATT)
12801 fputs ("OFFSET FLAT:", file);
12804 if (CONST_INT_P (x))
12805 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
12807 output_pic_addr_const (file, x, code);
12809 output_addr_const (file, x);
12814 ix86_print_operand_punct_valid_p (unsigned char code)
12816 return (code == '*' || code == '+' || code == '&' || code == ';');
12819 /* Print a memory operand whose address is ADDR. */
12822 ix86_print_operand_address (FILE *file, rtx addr)
12824 struct ix86_address parts;
12825 rtx base, index, disp;
12827 int ok = ix86_decompose_address (addr, &parts);
12832 index = parts.index;
12834 scale = parts.scale;
12842 if (ASSEMBLER_DIALECT == ASM_ATT)
12844 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
12847 gcc_unreachable ();
12850 /* Use one byte shorter RIP relative addressing for 64bit mode. */
12851 if (TARGET_64BIT && !base && !index)
12855 if (GET_CODE (disp) == CONST
12856 && GET_CODE (XEXP (disp, 0)) == PLUS
12857 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
12858 symbol = XEXP (XEXP (disp, 0), 0);
12860 if (GET_CODE (symbol) == LABEL_REF
12861 || (GET_CODE (symbol) == SYMBOL_REF
12862 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
12865 if (!base && !index)
12867 /* Displacement only requires special attention. */
12869 if (CONST_INT_P (disp))
12871 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
12872 fputs ("ds:", file);
12873 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
12876 output_pic_addr_const (file, disp, 0);
12878 output_addr_const (file, disp);
12882 if (ASSEMBLER_DIALECT == ASM_ATT)
12887 output_pic_addr_const (file, disp, 0);
12888 else if (GET_CODE (disp) == LABEL_REF)
12889 output_asm_label (disp);
12891 output_addr_const (file, disp);
12896 print_reg (base, 0, file);
12900 print_reg (index, 0, file);
12902 fprintf (file, ",%d", scale);
12908 rtx offset = NULL_RTX;
12912 /* Pull out the offset of a symbol; print any symbol itself. */
12913 if (GET_CODE (disp) == CONST
12914 && GET_CODE (XEXP (disp, 0)) == PLUS
12915 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
12917 offset = XEXP (XEXP (disp, 0), 1);
12918 disp = gen_rtx_CONST (VOIDmode,
12919 XEXP (XEXP (disp, 0), 0));
12923 output_pic_addr_const (file, disp, 0);
12924 else if (GET_CODE (disp) == LABEL_REF)
12925 output_asm_label (disp);
12926 else if (CONST_INT_P (disp))
12929 output_addr_const (file, disp);
12935 print_reg (base, 0, file);
12938 if (INTVAL (offset) >= 0)
12940 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
12944 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
12951 print_reg (index, 0, file);
12953 fprintf (file, "*%d", scale);
12961 output_addr_const_extra (FILE *file, rtx x)
12965 if (GET_CODE (x) != UNSPEC)
12968 op = XVECEXP (x, 0, 0);
12969 switch (XINT (x, 1))
12971 case UNSPEC_GOTTPOFF:
12972 output_addr_const (file, op);
12973 /* FIXME: This might be @TPOFF in Sun ld. */
12974 fputs ("@gottpoff", file);
12977 output_addr_const (file, op);
12978 fputs ("@tpoff", file);
12980 case UNSPEC_NTPOFF:
12981 output_addr_const (file, op);
12983 fputs ("@tpoff", file);
12985 fputs ("@ntpoff", file);
12987 case UNSPEC_DTPOFF:
12988 output_addr_const (file, op);
12989 fputs ("@dtpoff", file);
12991 case UNSPEC_GOTNTPOFF:
12992 output_addr_const (file, op);
12994 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12995 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
12997 fputs ("@gotntpoff", file);
12999 case UNSPEC_INDNTPOFF:
13000 output_addr_const (file, op);
13001 fputs ("@indntpoff", file);
13004 case UNSPEC_MACHOPIC_OFFSET:
13005 output_addr_const (file, op);
13007 machopic_output_function_base_name (file);
13018 /* Split one or more DImode RTL references into pairs of SImode
13019 references. The RTL can be REG, offsettable MEM, integer constant, or
13020 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
13021 split and "num" is its length. lo_half and hi_half are output arrays
13022 that parallel "operands". */
13025 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
13029 rtx op = operands[num];
13031 /* simplify_subreg refuse to split volatile memory addresses,
13032 but we still have to handle it. */
13035 lo_half[num] = adjust_address (op, SImode, 0);
13036 hi_half[num] = adjust_address (op, SImode, 4);
13040 lo_half[num] = simplify_gen_subreg (SImode, op,
13041 GET_MODE (op) == VOIDmode
13042 ? DImode : GET_MODE (op), 0);
13043 hi_half[num] = simplify_gen_subreg (SImode, op,
13044 GET_MODE (op) == VOIDmode
13045 ? DImode : GET_MODE (op), 4);
13049 /* Split one or more TImode RTL references into pairs of DImode
13050 references. The RTL can be REG, offsettable MEM, integer constant, or
13051 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
13052 split and "num" is its length. lo_half and hi_half are output arrays
13053 that parallel "operands". */
13056 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
13060 rtx op = operands[num];
13062 /* simplify_subreg refuse to split volatile memory addresses, but we
13063 still have to handle it. */
13066 lo_half[num] = adjust_address (op, DImode, 0);
13067 hi_half[num] = adjust_address (op, DImode, 8);
13071 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
13072 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
13077 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
13078 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
13079 is the expression of the binary operation. The output may either be
13080 emitted here, or returned to the caller, like all output_* functions.
13082 There is no guarantee that the operands are the same mode, as they
13083 might be within FLOAT or FLOAT_EXTEND expressions. */
13085 #ifndef SYSV386_COMPAT
13086 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
13087 wants to fix the assemblers because that causes incompatibility
13088 with gcc. No-one wants to fix gcc because that causes
13089 incompatibility with assemblers... You can use the option of
13090 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
13091 #define SYSV386_COMPAT 1
13095 output_387_binary_op (rtx insn, rtx *operands)
13097 static char buf[40];
13100 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
13102 #ifdef ENABLE_CHECKING
13103 /* Even if we do not want to check the inputs, this documents input
13104 constraints. Which helps in understanding the following code. */
13105 if (STACK_REG_P (operands[0])
13106 && ((REG_P (operands[1])
13107 && REGNO (operands[0]) == REGNO (operands[1])
13108 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
13109 || (REG_P (operands[2])
13110 && REGNO (operands[0]) == REGNO (operands[2])
13111 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
13112 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
13115 gcc_assert (is_sse);
13118 switch (GET_CODE (operands[3]))
13121 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13122 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13130 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13131 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13139 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13140 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13148 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13149 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13157 gcc_unreachable ();
13164 strcpy (buf, ssep);
13165 if (GET_MODE (operands[0]) == SFmode)
13166 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
13168 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
13172 strcpy (buf, ssep + 1);
13173 if (GET_MODE (operands[0]) == SFmode)
13174 strcat (buf, "ss\t{%2, %0|%0, %2}");
13176 strcat (buf, "sd\t{%2, %0|%0, %2}");
13182 switch (GET_CODE (operands[3]))
13186 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
13188 rtx temp = operands[2];
13189 operands[2] = operands[1];
13190 operands[1] = temp;
13193 /* know operands[0] == operands[1]. */
13195 if (MEM_P (operands[2]))
13201 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13203 if (STACK_TOP_P (operands[0]))
13204 /* How is it that we are storing to a dead operand[2]?
13205 Well, presumably operands[1] is dead too. We can't
13206 store the result to st(0) as st(0) gets popped on this
13207 instruction. Instead store to operands[2] (which I
13208 think has to be st(1)). st(1) will be popped later.
13209 gcc <= 2.8.1 didn't have this check and generated
13210 assembly code that the Unixware assembler rejected. */
13211 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13213 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13217 if (STACK_TOP_P (operands[0]))
13218 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13220 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13225 if (MEM_P (operands[1]))
13231 if (MEM_P (operands[2]))
13237 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13240 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
13241 derived assemblers, confusingly reverse the direction of
13242 the operation for fsub{r} and fdiv{r} when the
13243 destination register is not st(0). The Intel assembler
13244 doesn't have this brain damage. Read !SYSV386_COMPAT to
13245 figure out what the hardware really does. */
13246 if (STACK_TOP_P (operands[0]))
13247 p = "{p\t%0, %2|rp\t%2, %0}";
13249 p = "{rp\t%2, %0|p\t%0, %2}";
13251 if (STACK_TOP_P (operands[0]))
13252 /* As above for fmul/fadd, we can't store to st(0). */
13253 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13255 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13260 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
13263 if (STACK_TOP_P (operands[0]))
13264 p = "{rp\t%0, %1|p\t%1, %0}";
13266 p = "{p\t%1, %0|rp\t%0, %1}";
13268 if (STACK_TOP_P (operands[0]))
13269 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
13271 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
13276 if (STACK_TOP_P (operands[0]))
13278 if (STACK_TOP_P (operands[1]))
13279 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13281 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
13284 else if (STACK_TOP_P (operands[1]))
13287 p = "{\t%1, %0|r\t%0, %1}";
13289 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
13295 p = "{r\t%2, %0|\t%0, %2}";
13297 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13303 gcc_unreachable ();
13310 /* Return needed mode for entity in optimize_mode_switching pass. */
13313 ix86_mode_needed (int entity, rtx insn)
13315 enum attr_i387_cw mode;
13317 /* The mode UNINITIALIZED is used to store control word after a
13318 function call or ASM pattern. The mode ANY specify that function
13319 has no requirements on the control word and make no changes in the
13320 bits we are interested in. */
13323 || (NONJUMP_INSN_P (insn)
13324 && (asm_noperands (PATTERN (insn)) >= 0
13325 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
13326 return I387_CW_UNINITIALIZED;
13328 if (recog_memoized (insn) < 0)
13329 return I387_CW_ANY;
13331 mode = get_attr_i387_cw (insn);
13336 if (mode == I387_CW_TRUNC)
13341 if (mode == I387_CW_FLOOR)
13346 if (mode == I387_CW_CEIL)
13351 if (mode == I387_CW_MASK_PM)
13356 gcc_unreachable ();
13359 return I387_CW_ANY;
13362 /* Output code to initialize control word copies used by trunc?f?i and
13363 rounding patterns. CURRENT_MODE is set to current control word,
13364 while NEW_MODE is set to new control word. */
13367 emit_i387_cw_initialization (int mode)
13369 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
13372 enum ix86_stack_slot slot;
13374 rtx reg = gen_reg_rtx (HImode);
13376 emit_insn (gen_x86_fnstcw_1 (stored_mode));
13377 emit_move_insn (reg, copy_rtx (stored_mode));
13379 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
13380 || optimize_function_for_size_p (cfun))
13384 case I387_CW_TRUNC:
13385 /* round toward zero (truncate) */
13386 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
13387 slot = SLOT_CW_TRUNC;
13390 case I387_CW_FLOOR:
13391 /* round down toward -oo */
13392 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
13393 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
13394 slot = SLOT_CW_FLOOR;
13398 /* round up toward +oo */
13399 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
13400 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
13401 slot = SLOT_CW_CEIL;
13404 case I387_CW_MASK_PM:
13405 /* mask precision exception for nearbyint() */
13406 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
13407 slot = SLOT_CW_MASK_PM;
13411 gcc_unreachable ();
13418 case I387_CW_TRUNC:
13419 /* round toward zero (truncate) */
13420 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
13421 slot = SLOT_CW_TRUNC;
13424 case I387_CW_FLOOR:
13425 /* round down toward -oo */
13426 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
13427 slot = SLOT_CW_FLOOR;
13431 /* round up toward +oo */
13432 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
13433 slot = SLOT_CW_CEIL;
13436 case I387_CW_MASK_PM:
13437 /* mask precision exception for nearbyint() */
13438 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
13439 slot = SLOT_CW_MASK_PM;
13443 gcc_unreachable ();
13447 gcc_assert (slot < MAX_386_STACK_LOCALS);
13449 new_mode = assign_386_stack_local (HImode, slot);
13450 emit_move_insn (new_mode, reg);
13453 /* Output code for INSN to convert a float to a signed int. OPERANDS
13454 are the insn operands. The output may be [HSD]Imode and the input
13455 operand may be [SDX]Fmode. */
13458 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
13460 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
13461 int dimode_p = GET_MODE (operands[0]) == DImode;
13462 int round_mode = get_attr_i387_cw (insn);
13464 /* Jump through a hoop or two for DImode, since the hardware has no
13465 non-popping instruction. We used to do this a different way, but
13466 that was somewhat fragile and broke with post-reload splitters. */
13467 if ((dimode_p || fisttp) && !stack_top_dies)
13468 output_asm_insn ("fld\t%y1", operands);
13470 gcc_assert (STACK_TOP_P (operands[1]));
13471 gcc_assert (MEM_P (operands[0]));
13472 gcc_assert (GET_MODE (operands[1]) != TFmode);
13475 output_asm_insn ("fisttp%Z0\t%0", operands);
13478 if (round_mode != I387_CW_ANY)
13479 output_asm_insn ("fldcw\t%3", operands);
13480 if (stack_top_dies || dimode_p)
13481 output_asm_insn ("fistp%Z0\t%0", operands);
13483 output_asm_insn ("fist%Z0\t%0", operands);
13484 if (round_mode != I387_CW_ANY)
13485 output_asm_insn ("fldcw\t%2", operands);
13491 /* Output code for x87 ffreep insn. The OPNO argument, which may only
13492 have the values zero or one, indicates the ffreep insn's operand
13493 from the OPERANDS array. */
13495 static const char *
13496 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
13498 if (TARGET_USE_FFREEP)
13499 #ifdef HAVE_AS_IX86_FFREEP
13500 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
13503 static char retval[32];
13504 int regno = REGNO (operands[opno]);
13506 gcc_assert (FP_REGNO_P (regno));
13508 regno -= FIRST_STACK_REG;
13510 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
13515 return opno ? "fstp\t%y1" : "fstp\t%y0";
13519 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
13520 should be used. UNORDERED_P is true when fucom should be used. */
13523 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
13525 int stack_top_dies;
13526 rtx cmp_op0, cmp_op1;
13527 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
13531 cmp_op0 = operands[0];
13532 cmp_op1 = operands[1];
13536 cmp_op0 = operands[1];
13537 cmp_op1 = operands[2];
13542 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
13543 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
13544 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
13545 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
13547 if (GET_MODE (operands[0]) == SFmode)
13549 return &ucomiss[TARGET_AVX ? 0 : 1];
13551 return &comiss[TARGET_AVX ? 0 : 1];
13554 return &ucomisd[TARGET_AVX ? 0 : 1];
13556 return &comisd[TARGET_AVX ? 0 : 1];
13559 gcc_assert (STACK_TOP_P (cmp_op0));
13561 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
13563 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
13565 if (stack_top_dies)
13567 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
13568 return output_387_ffreep (operands, 1);
13571 return "ftst\n\tfnstsw\t%0";
13574 if (STACK_REG_P (cmp_op1)
13576 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
13577 && REGNO (cmp_op1) != FIRST_STACK_REG)
13579 /* If both the top of the 387 stack dies, and the other operand
13580 is also a stack register that dies, then this must be a
13581 `fcompp' float compare */
13585 /* There is no double popping fcomi variant. Fortunately,
13586 eflags is immune from the fstp's cc clobbering. */
13588 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
13590 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
13591 return output_387_ffreep (operands, 0);
13596 return "fucompp\n\tfnstsw\t%0";
13598 return "fcompp\n\tfnstsw\t%0";
13603 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
13605 static const char * const alt[16] =
13607 "fcom%Z2\t%y2\n\tfnstsw\t%0",
13608 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
13609 "fucom%Z2\t%y2\n\tfnstsw\t%0",
13610 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
13612 "ficom%Z2\t%y2\n\tfnstsw\t%0",
13613 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
13617 "fcomi\t{%y1, %0|%0, %y1}",
13618 "fcomip\t{%y1, %0|%0, %y1}",
13619 "fucomi\t{%y1, %0|%0, %y1}",
13620 "fucomip\t{%y1, %0|%0, %y1}",
13631 mask = eflags_p << 3;
13632 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
13633 mask |= unordered_p << 1;
13634 mask |= stack_top_dies;
13636 gcc_assert (mask < 16);
13645 ix86_output_addr_vec_elt (FILE *file, int value)
13647 const char *directive = ASM_LONG;
13651 directive = ASM_QUAD;
13653 gcc_assert (!TARGET_64BIT);
13656 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
13660 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
13662 const char *directive = ASM_LONG;
13665 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
13666 directive = ASM_QUAD;
13668 gcc_assert (!TARGET_64BIT);
13670 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
13671 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
13672 fprintf (file, "%s%s%d-%s%d\n",
13673 directive, LPREFIX, value, LPREFIX, rel);
13674 else if (HAVE_AS_GOTOFF_IN_DATA)
13675 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
13677 else if (TARGET_MACHO)
13679 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
13680 machopic_output_function_base_name (file);
13685 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
13686 GOT_SYMBOL_NAME, LPREFIX, value);
13689 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
13693 ix86_expand_clear (rtx dest)
13697 /* We play register width games, which are only valid after reload. */
13698 gcc_assert (reload_completed);
13700 /* Avoid HImode and its attendant prefix byte. */
13701 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
13702 dest = gen_rtx_REG (SImode, REGNO (dest));
13703 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
13705 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
13706 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
13708 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13709 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
13715 /* X is an unchanging MEM. If it is a constant pool reference, return
13716 the constant pool rtx, else NULL. */
13719 maybe_get_pool_constant (rtx x)
13721 x = ix86_delegitimize_address (XEXP (x, 0));
13723 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
13724 return get_pool_constant (x);
13730 ix86_expand_move (enum machine_mode mode, rtx operands[])
13733 enum tls_model model;
13738 if (GET_CODE (op1) == SYMBOL_REF)
13740 model = SYMBOL_REF_TLS_MODEL (op1);
13743 op1 = legitimize_tls_address (op1, model, true);
13744 op1 = force_operand (op1, op0);
13748 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
13749 && SYMBOL_REF_DLLIMPORT_P (op1))
13750 op1 = legitimize_dllimport_symbol (op1, false);
13752 else if (GET_CODE (op1) == CONST
13753 && GET_CODE (XEXP (op1, 0)) == PLUS
13754 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
13756 rtx addend = XEXP (XEXP (op1, 0), 1);
13757 rtx symbol = XEXP (XEXP (op1, 0), 0);
13760 model = SYMBOL_REF_TLS_MODEL (symbol);
13762 tmp = legitimize_tls_address (symbol, model, true);
13763 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
13764 && SYMBOL_REF_DLLIMPORT_P (symbol))
13765 tmp = legitimize_dllimport_symbol (symbol, true);
13769 tmp = force_operand (tmp, NULL);
13770 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
13771 op0, 1, OPTAB_DIRECT);
13777 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
13779 if (TARGET_MACHO && !TARGET_64BIT)
13784 rtx temp = ((reload_in_progress
13785 || ((op0 && REG_P (op0))
13787 ? op0 : gen_reg_rtx (Pmode));
13788 op1 = machopic_indirect_data_reference (op1, temp);
13789 op1 = machopic_legitimize_pic_address (op1, mode,
13790 temp == op1 ? 0 : temp);
13792 else if (MACHOPIC_INDIRECT)
13793 op1 = machopic_indirect_data_reference (op1, 0);
13801 op1 = force_reg (Pmode, op1);
13802 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
13804 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
13805 op1 = legitimize_pic_address (op1, reg);
13814 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
13815 || !push_operand (op0, mode))
13817 op1 = force_reg (mode, op1);
13819 if (push_operand (op0, mode)
13820 && ! general_no_elim_operand (op1, mode))
13821 op1 = copy_to_mode_reg (mode, op1);
13823 /* Force large constants in 64bit compilation into register
13824 to get them CSEed. */
13825 if (can_create_pseudo_p ()
13826 && (mode == DImode) && TARGET_64BIT
13827 && immediate_operand (op1, mode)
13828 && !x86_64_zext_immediate_operand (op1, VOIDmode)
13829 && !register_operand (op0, mode)
13831 op1 = copy_to_mode_reg (mode, op1);
13833 if (can_create_pseudo_p ()
13834 && FLOAT_MODE_P (mode)
13835 && GET_CODE (op1) == CONST_DOUBLE)
13837 /* If we are loading a floating point constant to a register,
13838 force the value to memory now, since we'll get better code
13839 out the back end. */
13841 op1 = validize_mem (force_const_mem (mode, op1));
13842 if (!register_operand (op0, mode))
13844 rtx temp = gen_reg_rtx (mode);
13845 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
13846 emit_move_insn (op0, temp);
13852 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
13856 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
13858 rtx op0 = operands[0], op1 = operands[1];
13859 unsigned int align = GET_MODE_ALIGNMENT (mode);
13861 /* Force constants other than zero into memory. We do not know how
13862 the instructions used to build constants modify the upper 64 bits
13863 of the register, once we have that information we may be able
13864 to handle some of them more efficiently. */
13865 if (can_create_pseudo_p ()
13866 && register_operand (op0, mode)
13867 && (CONSTANT_P (op1)
13868 || (GET_CODE (op1) == SUBREG
13869 && CONSTANT_P (SUBREG_REG (op1))))
13870 && !standard_sse_constant_p (op1))
13871 op1 = validize_mem (force_const_mem (mode, op1));
13873 /* We need to check memory alignment for SSE mode since attribute
13874 can make operands unaligned. */
13875 if (can_create_pseudo_p ()
13876 && SSE_REG_MODE_P (mode)
13877 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
13878 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
13882 /* ix86_expand_vector_move_misalign() does not like constants ... */
13883 if (CONSTANT_P (op1)
13884 || (GET_CODE (op1) == SUBREG
13885 && CONSTANT_P (SUBREG_REG (op1))))
13886 op1 = validize_mem (force_const_mem (mode, op1));
13888 /* ... nor both arguments in memory. */
13889 if (!register_operand (op0, mode)
13890 && !register_operand (op1, mode))
13891 op1 = force_reg (mode, op1);
13893 tmp[0] = op0; tmp[1] = op1;
13894 ix86_expand_vector_move_misalign (mode, tmp);
13898 /* Make operand1 a register if it isn't already. */
13899 if (can_create_pseudo_p ()
13900 && !register_operand (op0, mode)
13901 && !register_operand (op1, mode))
13903 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
13907 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
13910 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
13911 straight to ix86_expand_vector_move. */
13912 /* Code generation for scalar reg-reg moves of single and double precision data:
13913 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
13917 if (x86_sse_partial_reg_dependency == true)
13922 Code generation for scalar loads of double precision data:
13923 if (x86_sse_split_regs == true)
13924 movlpd mem, reg (gas syntax)
13928 Code generation for unaligned packed loads of single precision data
13929 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
13930 if (x86_sse_unaligned_move_optimal)
13933 if (x86_sse_partial_reg_dependency == true)
13945 Code generation for unaligned packed loads of double precision data
13946 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
13947 if (x86_sse_unaligned_move_optimal)
13950 if (x86_sse_split_regs == true)
13963 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
13972 switch (GET_MODE_CLASS (mode))
13974 case MODE_VECTOR_INT:
13976 switch (GET_MODE_SIZE (mode))
13979 /* If we're optimizing for size, movups is the smallest. */
13980 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
13982 op0 = gen_lowpart (V4SFmode, op0);
13983 op1 = gen_lowpart (V4SFmode, op1);
13984 emit_insn (gen_avx_movups (op0, op1));
13987 op0 = gen_lowpart (V16QImode, op0);
13988 op1 = gen_lowpart (V16QImode, op1);
13989 emit_insn (gen_avx_movdqu (op0, op1));
13992 op0 = gen_lowpart (V32QImode, op0);
13993 op1 = gen_lowpart (V32QImode, op1);
13994 emit_insn (gen_avx_movdqu256 (op0, op1));
13997 gcc_unreachable ();
14000 case MODE_VECTOR_FLOAT:
14001 op0 = gen_lowpart (mode, op0);
14002 op1 = gen_lowpart (mode, op1);
14007 emit_insn (gen_avx_movups (op0, op1));
14010 emit_insn (gen_avx_movups256 (op0, op1));
14013 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14015 op0 = gen_lowpart (V4SFmode, op0);
14016 op1 = gen_lowpart (V4SFmode, op1);
14017 emit_insn (gen_avx_movups (op0, op1));
14020 emit_insn (gen_avx_movupd (op0, op1));
14023 emit_insn (gen_avx_movupd256 (op0, op1));
14026 gcc_unreachable ();
14031 gcc_unreachable ();
14039 /* If we're optimizing for size, movups is the smallest. */
14040 if (optimize_insn_for_size_p ()
14041 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14043 op0 = gen_lowpart (V4SFmode, op0);
14044 op1 = gen_lowpart (V4SFmode, op1);
14045 emit_insn (gen_sse_movups (op0, op1));
14049 /* ??? If we have typed data, then it would appear that using
14050 movdqu is the only way to get unaligned data loaded with
14052 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14054 op0 = gen_lowpart (V16QImode, op0);
14055 op1 = gen_lowpart (V16QImode, op1);
14056 emit_insn (gen_sse2_movdqu (op0, op1));
14060 if (TARGET_SSE2 && mode == V2DFmode)
14064 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
14066 op0 = gen_lowpart (V2DFmode, op0);
14067 op1 = gen_lowpart (V2DFmode, op1);
14068 emit_insn (gen_sse2_movupd (op0, op1));
14072 /* When SSE registers are split into halves, we can avoid
14073 writing to the top half twice. */
14074 if (TARGET_SSE_SPLIT_REGS)
14076 emit_clobber (op0);
14081 /* ??? Not sure about the best option for the Intel chips.
14082 The following would seem to satisfy; the register is
14083 entirely cleared, breaking the dependency chain. We
14084 then store to the upper half, with a dependency depth
14085 of one. A rumor has it that Intel recommends two movsd
14086 followed by an unpacklpd, but this is unconfirmed. And
14087 given that the dependency depth of the unpacklpd would
14088 still be one, I'm not sure why this would be better. */
14089 zero = CONST0_RTX (V2DFmode);
14092 m = adjust_address (op1, DFmode, 0);
14093 emit_insn (gen_sse2_loadlpd (op0, zero, m));
14094 m = adjust_address (op1, DFmode, 8);
14095 emit_insn (gen_sse2_loadhpd (op0, op0, m));
14099 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
14101 op0 = gen_lowpart (V4SFmode, op0);
14102 op1 = gen_lowpart (V4SFmode, op1);
14103 emit_insn (gen_sse_movups (op0, op1));
14107 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
14108 emit_move_insn (op0, CONST0_RTX (mode));
14110 emit_clobber (op0);
14112 if (mode != V4SFmode)
14113 op0 = gen_lowpart (V4SFmode, op0);
14114 m = adjust_address (op1, V2SFmode, 0);
14115 emit_insn (gen_sse_loadlps (op0, op0, m));
14116 m = adjust_address (op1, V2SFmode, 8);
14117 emit_insn (gen_sse_loadhps (op0, op0, m));
14120 else if (MEM_P (op0))
14122 /* If we're optimizing for size, movups is the smallest. */
14123 if (optimize_insn_for_size_p ()
14124 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14126 op0 = gen_lowpart (V4SFmode, op0);
14127 op1 = gen_lowpart (V4SFmode, op1);
14128 emit_insn (gen_sse_movups (op0, op1));
14132 /* ??? Similar to above, only less clear because of quote
14133 typeless stores unquote. */
14134 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
14135 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14137 op0 = gen_lowpart (V16QImode, op0);
14138 op1 = gen_lowpart (V16QImode, op1);
14139 emit_insn (gen_sse2_movdqu (op0, op1));
14143 if (TARGET_SSE2 && mode == V2DFmode)
14145 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
14147 op0 = gen_lowpart (V2DFmode, op0);
14148 op1 = gen_lowpart (V2DFmode, op1);
14149 emit_insn (gen_sse2_movupd (op0, op1));
14153 m = adjust_address (op0, DFmode, 0);
14154 emit_insn (gen_sse2_storelpd (m, op1));
14155 m = adjust_address (op0, DFmode, 8);
14156 emit_insn (gen_sse2_storehpd (m, op1));
14161 if (mode != V4SFmode)
14162 op1 = gen_lowpart (V4SFmode, op1);
14164 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
14166 op0 = gen_lowpart (V4SFmode, op0);
14167 emit_insn (gen_sse_movups (op0, op1));
14171 m = adjust_address (op0, V2SFmode, 0);
14172 emit_insn (gen_sse_storelps (m, op1));
14173 m = adjust_address (op0, V2SFmode, 8);
14174 emit_insn (gen_sse_storehps (m, op1));
14179 gcc_unreachable ();
14182 /* Expand a push in MODE. This is some mode for which we do not support
14183 proper push instructions, at least from the registers that we expect
14184 the value to live in. */
14187 ix86_expand_push (enum machine_mode mode, rtx x)
14191 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
14192 GEN_INT (-GET_MODE_SIZE (mode)),
14193 stack_pointer_rtx, 1, OPTAB_DIRECT);
14194 if (tmp != stack_pointer_rtx)
14195 emit_move_insn (stack_pointer_rtx, tmp);
14197 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
14199 /* When we push an operand onto stack, it has to be aligned at least
14200 at the function argument boundary. However since we don't have
14201 the argument type, we can't determine the actual argument
14203 emit_move_insn (tmp, x);
14206 /* Helper function of ix86_fixup_binary_operands to canonicalize
14207 operand order. Returns true if the operands should be swapped. */
14210 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
14213 rtx dst = operands[0];
14214 rtx src1 = operands[1];
14215 rtx src2 = operands[2];
14217 /* If the operation is not commutative, we can't do anything. */
14218 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
14221 /* Highest priority is that src1 should match dst. */
14222 if (rtx_equal_p (dst, src1))
14224 if (rtx_equal_p (dst, src2))
14227 /* Next highest priority is that immediate constants come second. */
14228 if (immediate_operand (src2, mode))
14230 if (immediate_operand (src1, mode))
14233 /* Lowest priority is that memory references should come second. */
14243 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
14244 destination to use for the operation. If different from the true
14245 destination in operands[0], a copy operation will be required. */
14248 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
14251 rtx dst = operands[0];
14252 rtx src1 = operands[1];
14253 rtx src2 = operands[2];
14255 /* Canonicalize operand order. */
14256 if (ix86_swap_binary_operands_p (code, mode, operands))
14260 /* It is invalid to swap operands of different modes. */
14261 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
14268 /* Both source operands cannot be in memory. */
14269 if (MEM_P (src1) && MEM_P (src2))
14271 /* Optimization: Only read from memory once. */
14272 if (rtx_equal_p (src1, src2))
14274 src2 = force_reg (mode, src2);
14278 src2 = force_reg (mode, src2);
14281 /* If the destination is memory, and we do not have matching source
14282 operands, do things in registers. */
14283 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
14284 dst = gen_reg_rtx (mode);
14286 /* Source 1 cannot be a constant. */
14287 if (CONSTANT_P (src1))
14288 src1 = force_reg (mode, src1);
14290 /* Source 1 cannot be a non-matching memory. */
14291 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
14292 src1 = force_reg (mode, src1);
14294 operands[1] = src1;
14295 operands[2] = src2;
14299 /* Similarly, but assume that the destination has already been
14300 set up properly. */
14303 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
14304 enum machine_mode mode, rtx operands[])
14306 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
14307 gcc_assert (dst == operands[0]);
14310 /* Attempt to expand a binary operator. Make the expansion closer to the
14311 actual machine, then just general_operand, which will allow 3 separate
14312 memory references (one output, two input) in a single insn. */
14315 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
14318 rtx src1, src2, dst, op, clob;
14320 dst = ix86_fixup_binary_operands (code, mode, operands);
14321 src1 = operands[1];
14322 src2 = operands[2];
14324 /* Emit the instruction. */
14326 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
14327 if (reload_in_progress)
14329 /* Reload doesn't know about the flags register, and doesn't know that
14330 it doesn't want to clobber it. We can only do this with PLUS. */
14331 gcc_assert (code == PLUS);
14336 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14337 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
14340 /* Fix up the destination if needed. */
14341 if (dst != operands[0])
14342 emit_move_insn (operands[0], dst);
14345 /* Return TRUE or FALSE depending on whether the binary operator meets the
14346 appropriate constraints. */
14349 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
14352 rtx dst = operands[0];
14353 rtx src1 = operands[1];
14354 rtx src2 = operands[2];
14356 /* Both source operands cannot be in memory. */
14357 if (MEM_P (src1) && MEM_P (src2))
14360 /* Canonicalize operand order for commutative operators. */
14361 if (ix86_swap_binary_operands_p (code, mode, operands))
14368 /* If the destination is memory, we must have a matching source operand. */
14369 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
14372 /* Source 1 cannot be a constant. */
14373 if (CONSTANT_P (src1))
14376 /* Source 1 cannot be a non-matching memory. */
14377 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
14383 /* Attempt to expand a unary operator. Make the expansion closer to the
14384 actual machine, then just general_operand, which will allow 2 separate
14385 memory references (one output, one input) in a single insn. */
14388 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
14391 int matching_memory;
14392 rtx src, dst, op, clob;
14397 /* If the destination is memory, and we do not have matching source
14398 operands, do things in registers. */
14399 matching_memory = 0;
14402 if (rtx_equal_p (dst, src))
14403 matching_memory = 1;
14405 dst = gen_reg_rtx (mode);
14408 /* When source operand is memory, destination must match. */
14409 if (MEM_P (src) && !matching_memory)
14410 src = force_reg (mode, src);
14412 /* Emit the instruction. */
14414 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
14415 if (reload_in_progress || code == NOT)
14417 /* Reload doesn't know about the flags register, and doesn't know that
14418 it doesn't want to clobber it. */
14419 gcc_assert (code == NOT);
14424 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14425 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
14428 /* Fix up the destination if needed. */
14429 if (dst != operands[0])
14430 emit_move_insn (operands[0], dst);
14433 #define LEA_SEARCH_THRESHOLD 12
14435 /* Search backward for non-agu definition of register number REGNO1
14436 or register number REGNO2 in INSN's basic block until
14437 1. Pass LEA_SEARCH_THRESHOLD instructions, or
14438 2. Reach BB boundary, or
14439 3. Reach agu definition.
14440 Returns the distance between the non-agu definition point and INSN.
14441 If no definition point, returns -1. */
14444 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
14447 basic_block bb = BLOCK_FOR_INSN (insn);
14450 enum attr_type insn_type;
14452 if (insn != BB_HEAD (bb))
14454 rtx prev = PREV_INSN (insn);
14455 while (prev && distance < LEA_SEARCH_THRESHOLD)
14457 if (NONDEBUG_INSN_P (prev))
14460 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
14461 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
14462 && !DF_REF_IS_ARTIFICIAL (*def_rec)
14463 && (regno1 == DF_REF_REGNO (*def_rec)
14464 || regno2 == DF_REF_REGNO (*def_rec)))
14466 insn_type = get_attr_type (prev);
14467 if (insn_type != TYPE_LEA)
14471 if (prev == BB_HEAD (bb))
14473 prev = PREV_INSN (prev);
14477 if (distance < LEA_SEARCH_THRESHOLD)
14481 bool simple_loop = false;
14483 FOR_EACH_EDGE (e, ei, bb->preds)
14486 simple_loop = true;
14492 rtx prev = BB_END (bb);
14495 && distance < LEA_SEARCH_THRESHOLD)
14497 if (NONDEBUG_INSN_P (prev))
14500 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
14501 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
14502 && !DF_REF_IS_ARTIFICIAL (*def_rec)
14503 && (regno1 == DF_REF_REGNO (*def_rec)
14504 || regno2 == DF_REF_REGNO (*def_rec)))
14506 insn_type = get_attr_type (prev);
14507 if (insn_type != TYPE_LEA)
14511 prev = PREV_INSN (prev);
14519 /* get_attr_type may modify recog data. We want to make sure
14520 that recog data is valid for instruction INSN, on which
14521 distance_non_agu_define is called. INSN is unchanged here. */
14522 extract_insn_cached (insn);
14526 /* Return the distance between INSN and the next insn that uses
14527 register number REGNO0 in memory address. Return -1 if no such
14528 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
14531 distance_agu_use (unsigned int regno0, rtx insn)
14533 basic_block bb = BLOCK_FOR_INSN (insn);
14538 if (insn != BB_END (bb))
14540 rtx next = NEXT_INSN (insn);
14541 while (next && distance < LEA_SEARCH_THRESHOLD)
14543 if (NONDEBUG_INSN_P (next))
14547 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
14548 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
14549 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
14550 && regno0 == DF_REF_REGNO (*use_rec))
14552 /* Return DISTANCE if OP0 is used in memory
14553 address in NEXT. */
14557 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
14558 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
14559 && !DF_REF_IS_ARTIFICIAL (*def_rec)
14560 && regno0 == DF_REF_REGNO (*def_rec))
14562 /* Return -1 if OP0 is set in NEXT. */
14566 if (next == BB_END (bb))
14568 next = NEXT_INSN (next);
14572 if (distance < LEA_SEARCH_THRESHOLD)
14576 bool simple_loop = false;
14578 FOR_EACH_EDGE (e, ei, bb->succs)
14581 simple_loop = true;
14587 rtx next = BB_HEAD (bb);
14590 && distance < LEA_SEARCH_THRESHOLD)
14592 if (NONDEBUG_INSN_P (next))
14596 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
14597 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
14598 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
14599 && regno0 == DF_REF_REGNO (*use_rec))
14601 /* Return DISTANCE if OP0 is used in memory
14602 address in NEXT. */
14606 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
14607 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
14608 && !DF_REF_IS_ARTIFICIAL (*def_rec)
14609 && regno0 == DF_REF_REGNO (*def_rec))
14611 /* Return -1 if OP0 is set in NEXT. */
14616 next = NEXT_INSN (next);
14624 /* Define this macro to tune LEA priority vs ADD, it take effect when
14625 there is a dilemma of choicing LEA or ADD
14626 Negative value: ADD is more preferred than LEA
14628 Positive value: LEA is more preferred than ADD*/
14629 #define IX86_LEA_PRIORITY 2
14631 /* Return true if it is ok to optimize an ADD operation to LEA
14632 operation to avoid flag register consumation. For the processors
14633 like ATOM, if the destination register of LEA holds an actual
14634 address which will be used soon, LEA is better and otherwise ADD
14638 ix86_lea_for_add_ok (enum rtx_code code ATTRIBUTE_UNUSED,
14639 rtx insn, rtx operands[])
14641 unsigned int regno0 = true_regnum (operands[0]);
14642 unsigned int regno1 = true_regnum (operands[1]);
14643 unsigned int regno2;
14645 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
14646 return regno0 != regno1;
14648 regno2 = true_regnum (operands[2]);
14650 /* If a = b + c, (a!=b && a!=c), must use lea form. */
14651 if (regno0 != regno1 && regno0 != regno2)
14655 int dist_define, dist_use;
14656 dist_define = distance_non_agu_define (regno1, regno2, insn);
14657 if (dist_define <= 0)
14660 /* If this insn has both backward non-agu dependence and forward
14661 agu dependence, the one with short distance take effect. */
14662 dist_use = distance_agu_use (regno0, insn);
14664 || (dist_define + IX86_LEA_PRIORITY) < dist_use)
14671 /* Return true if destination reg of SET_BODY is shift count of
14675 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
14681 /* Retrieve destination of SET_BODY. */
14682 switch (GET_CODE (set_body))
14685 set_dest = SET_DEST (set_body);
14686 if (!set_dest || !REG_P (set_dest))
14690 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
14691 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
14699 /* Retrieve shift count of USE_BODY. */
14700 switch (GET_CODE (use_body))
14703 shift_rtx = XEXP (use_body, 1);
14706 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
14707 if (ix86_dep_by_shift_count_body (set_body,
14708 XVECEXP (use_body, 0, i)))
14716 && (GET_CODE (shift_rtx) == ASHIFT
14717 || GET_CODE (shift_rtx) == LSHIFTRT
14718 || GET_CODE (shift_rtx) == ASHIFTRT
14719 || GET_CODE (shift_rtx) == ROTATE
14720 || GET_CODE (shift_rtx) == ROTATERT))
14722 rtx shift_count = XEXP (shift_rtx, 1);
14724 /* Return true if shift count is dest of SET_BODY. */
14725 if (REG_P (shift_count)
14726 && true_regnum (set_dest) == true_regnum (shift_count))
14733 /* Return true if destination reg of SET_INSN is shift count of
14737 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
14739 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
14740 PATTERN (use_insn));
14743 /* Return TRUE or FALSE depending on whether the unary operator meets the
14744 appropriate constraints. */
14747 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
14748 enum machine_mode mode ATTRIBUTE_UNUSED,
14749 rtx operands[2] ATTRIBUTE_UNUSED)
14751 /* If one of operands is memory, source and destination must match. */
14752 if ((MEM_P (operands[0])
14753 || MEM_P (operands[1]))
14754 && ! rtx_equal_p (operands[0], operands[1]))
14759 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
14760 are ok, keeping in mind the possible movddup alternative. */
14763 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
14765 if (MEM_P (operands[0]))
14766 return rtx_equal_p (operands[0], operands[1 + high]);
14767 if (MEM_P (operands[1]) && MEM_P (operands[2]))
14768 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
14772 /* Post-reload splitter for converting an SF or DFmode value in an
14773 SSE register into an unsigned SImode. */
14776 ix86_split_convert_uns_si_sse (rtx operands[])
14778 enum machine_mode vecmode;
14779 rtx value, large, zero_or_two31, input, two31, x;
14781 large = operands[1];
14782 zero_or_two31 = operands[2];
14783 input = operands[3];
14784 two31 = operands[4];
14785 vecmode = GET_MODE (large);
14786 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
14788 /* Load up the value into the low element. We must ensure that the other
14789 elements are valid floats -- zero is the easiest such value. */
14792 if (vecmode == V4SFmode)
14793 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
14795 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
14799 input = gen_rtx_REG (vecmode, REGNO (input));
14800 emit_move_insn (value, CONST0_RTX (vecmode));
14801 if (vecmode == V4SFmode)
14802 emit_insn (gen_sse_movss (value, value, input));
14804 emit_insn (gen_sse2_movsd (value, value, input));
14807 emit_move_insn (large, two31);
14808 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
14810 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
14811 emit_insn (gen_rtx_SET (VOIDmode, large, x));
14813 x = gen_rtx_AND (vecmode, zero_or_two31, large);
14814 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
14816 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
14817 emit_insn (gen_rtx_SET (VOIDmode, value, x));
14819 large = gen_rtx_REG (V4SImode, REGNO (large));
14820 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
14822 x = gen_rtx_REG (V4SImode, REGNO (value));
14823 if (vecmode == V4SFmode)
14824 emit_insn (gen_sse2_cvttps2dq (x, value));
14826 emit_insn (gen_sse2_cvttpd2dq (x, value));
14829 emit_insn (gen_xorv4si3 (value, value, large));
14832 /* Convert an unsigned DImode value into a DFmode, using only SSE.
14833 Expects the 64-bit DImode to be supplied in a pair of integral
14834 registers. Requires SSE2; will use SSE3 if available. For x86_32,
14835 -mfpmath=sse, !optimize_size only. */
14838 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
14840 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
14841 rtx int_xmm, fp_xmm;
14842 rtx biases, exponents;
14845 int_xmm = gen_reg_rtx (V4SImode);
14846 if (TARGET_INTER_UNIT_MOVES)
14847 emit_insn (gen_movdi_to_sse (int_xmm, input));
14848 else if (TARGET_SSE_SPLIT_REGS)
14850 emit_clobber (int_xmm);
14851 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
14855 x = gen_reg_rtx (V2DImode);
14856 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
14857 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
14860 x = gen_rtx_CONST_VECTOR (V4SImode,
14861 gen_rtvec (4, GEN_INT (0x43300000UL),
14862 GEN_INT (0x45300000UL),
14863 const0_rtx, const0_rtx));
14864 exponents = validize_mem (force_const_mem (V4SImode, x));
14866 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
14867 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
14869 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
14870 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
14871 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
14872 (0x1.0p84 + double(fp_value_hi_xmm)).
14873 Note these exponents differ by 32. */
14875 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
14877 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
14878 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
14879 real_ldexp (&bias_lo_rvt, &dconst1, 52);
14880 real_ldexp (&bias_hi_rvt, &dconst1, 84);
14881 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
14882 x = const_double_from_real_value (bias_hi_rvt, DFmode);
14883 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
14884 biases = validize_mem (force_const_mem (V2DFmode, biases));
14885 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
14887 /* Add the upper and lower DFmode values together. */
14889 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
14892 x = copy_to_mode_reg (V2DFmode, fp_xmm);
14893 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
14894 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
14897 ix86_expand_vector_extract (false, target, fp_xmm, 0);
14900 /* Not used, but eases macroization of patterns. */
14902 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
14903 rtx input ATTRIBUTE_UNUSED)
14905 gcc_unreachable ();
14908 /* Convert an unsigned SImode value into a DFmode. Only currently used
14909 for SSE, but applicable anywhere. */
14912 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
14914 REAL_VALUE_TYPE TWO31r;
14917 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
14918 NULL, 1, OPTAB_DIRECT);
14920 fp = gen_reg_rtx (DFmode);
14921 emit_insn (gen_floatsidf2 (fp, x));
14923 real_ldexp (&TWO31r, &dconst1, 31);
14924 x = const_double_from_real_value (TWO31r, DFmode);
14926 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
14928 emit_move_insn (target, x);
14931 /* Convert a signed DImode value into a DFmode. Only used for SSE in
14932 32-bit mode; otherwise we have a direct convert instruction. */
14935 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
14937 REAL_VALUE_TYPE TWO32r;
14938 rtx fp_lo, fp_hi, x;
14940 fp_lo = gen_reg_rtx (DFmode);
14941 fp_hi = gen_reg_rtx (DFmode);
14943 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
14945 real_ldexp (&TWO32r, &dconst1, 32);
14946 x = const_double_from_real_value (TWO32r, DFmode);
14947 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
14949 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
14951 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
14954 emit_move_insn (target, x);
14957 /* Convert an unsigned SImode value into a SFmode, using only SSE.
14958 For x86_32, -mfpmath=sse, !optimize_size only. */
14960 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
14962 REAL_VALUE_TYPE ONE16r;
14963 rtx fp_hi, fp_lo, int_hi, int_lo, x;
14965 real_ldexp (&ONE16r, &dconst1, 16);
14966 x = const_double_from_real_value (ONE16r, SFmode);
14967 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
14968 NULL, 0, OPTAB_DIRECT);
14969 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
14970 NULL, 0, OPTAB_DIRECT);
14971 fp_hi = gen_reg_rtx (SFmode);
14972 fp_lo = gen_reg_rtx (SFmode);
14973 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
14974 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
14975 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
14977 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
14979 if (!rtx_equal_p (target, fp_hi))
14980 emit_move_insn (target, fp_hi);
14983 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
14984 then replicate the value for all elements of the vector
14988 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
14995 v = gen_rtvec (4, value, value, value, value);
14996 return gen_rtx_CONST_VECTOR (V4SImode, v);
15000 v = gen_rtvec (2, value, value);
15001 return gen_rtx_CONST_VECTOR (V2DImode, v);
15005 v = gen_rtvec (4, value, value, value, value);
15007 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
15008 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
15009 return gen_rtx_CONST_VECTOR (V4SFmode, v);
15013 v = gen_rtvec (2, value, value);
15015 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
15016 return gen_rtx_CONST_VECTOR (V2DFmode, v);
15019 gcc_unreachable ();
15023 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
15024 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
15025 for an SSE register. If VECT is true, then replicate the mask for
15026 all elements of the vector register. If INVERT is true, then create
15027 a mask excluding the sign bit. */
15030 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
15032 enum machine_mode vec_mode, imode;
15033 HOST_WIDE_INT hi, lo;
15038 /* Find the sign bit, sign extended to 2*HWI. */
15044 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
15045 lo = 0x80000000, hi = lo < 0;
15051 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
15052 if (HOST_BITS_PER_WIDE_INT >= 64)
15053 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
15055 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
15060 vec_mode = VOIDmode;
15061 if (HOST_BITS_PER_WIDE_INT >= 64)
15064 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
15071 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
15075 lo = ~lo, hi = ~hi;
15081 mask = immed_double_const (lo, hi, imode);
15083 vec = gen_rtvec (2, v, mask);
15084 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
15085 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
15092 gcc_unreachable ();
15096 lo = ~lo, hi = ~hi;
15098 /* Force this value into the low part of a fp vector constant. */
15099 mask = immed_double_const (lo, hi, imode);
15100 mask = gen_lowpart (mode, mask);
15102 if (vec_mode == VOIDmode)
15103 return force_reg (mode, mask);
15105 v = ix86_build_const_vector (mode, vect, mask);
15106 return force_reg (vec_mode, v);
15109 /* Generate code for floating point ABS or NEG. */
15112 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
15115 rtx mask, set, use, clob, dst, src;
15116 bool use_sse = false;
15117 bool vector_mode = VECTOR_MODE_P (mode);
15118 enum machine_mode elt_mode = mode;
15122 elt_mode = GET_MODE_INNER (mode);
15125 else if (mode == TFmode)
15127 else if (TARGET_SSE_MATH)
15128 use_sse = SSE_FLOAT_MODE_P (mode);
15130 /* NEG and ABS performed with SSE use bitwise mask operations.
15131 Create the appropriate mask now. */
15133 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
15142 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
15143 set = gen_rtx_SET (VOIDmode, dst, set);
15148 set = gen_rtx_fmt_e (code, mode, src);
15149 set = gen_rtx_SET (VOIDmode, dst, set);
15152 use = gen_rtx_USE (VOIDmode, mask);
15153 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15154 emit_insn (gen_rtx_PARALLEL (VOIDmode,
15155 gen_rtvec (3, set, use, clob)));
15162 /* Expand a copysign operation. Special case operand 0 being a constant. */
15165 ix86_expand_copysign (rtx operands[])
15167 enum machine_mode mode;
15168 rtx dest, op0, op1, mask, nmask;
15170 dest = operands[0];
15174 mode = GET_MODE (dest);
15176 if (GET_CODE (op0) == CONST_DOUBLE)
15178 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
15180 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
15181 op0 = simplify_unary_operation (ABS, mode, op0, mode);
15183 if (mode == SFmode || mode == DFmode)
15185 enum machine_mode vmode;
15187 vmode = mode == SFmode ? V4SFmode : V2DFmode;
15189 if (op0 == CONST0_RTX (mode))
15190 op0 = CONST0_RTX (vmode);
15193 rtx v = ix86_build_const_vector (mode, false, op0);
15195 op0 = force_reg (vmode, v);
15198 else if (op0 != CONST0_RTX (mode))
15199 op0 = force_reg (mode, op0);
15201 mask = ix86_build_signbit_mask (mode, 0, 0);
15203 if (mode == SFmode)
15204 copysign_insn = gen_copysignsf3_const;
15205 else if (mode == DFmode)
15206 copysign_insn = gen_copysigndf3_const;
15208 copysign_insn = gen_copysigntf3_const;
15210 emit_insn (copysign_insn (dest, op0, op1, mask));
15214 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
15216 nmask = ix86_build_signbit_mask (mode, 0, 1);
15217 mask = ix86_build_signbit_mask (mode, 0, 0);
15219 if (mode == SFmode)
15220 copysign_insn = gen_copysignsf3_var;
15221 else if (mode == DFmode)
15222 copysign_insn = gen_copysigndf3_var;
15224 copysign_insn = gen_copysigntf3_var;
15226 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
15230 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
15231 be a constant, and so has already been expanded into a vector constant. */
15234 ix86_split_copysign_const (rtx operands[])
15236 enum machine_mode mode, vmode;
15237 rtx dest, op0, mask, x;
15239 dest = operands[0];
15241 mask = operands[3];
15243 mode = GET_MODE (dest);
15244 vmode = GET_MODE (mask);
15246 dest = simplify_gen_subreg (vmode, dest, mode, 0);
15247 x = gen_rtx_AND (vmode, dest, mask);
15248 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15250 if (op0 != CONST0_RTX (vmode))
15252 x = gen_rtx_IOR (vmode, dest, op0);
15253 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15257 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
15258 so we have to do two masks. */
15261 ix86_split_copysign_var (rtx operands[])
15263 enum machine_mode mode, vmode;
15264 rtx dest, scratch, op0, op1, mask, nmask, x;
15266 dest = operands[0];
15267 scratch = operands[1];
15270 nmask = operands[4];
15271 mask = operands[5];
15273 mode = GET_MODE (dest);
15274 vmode = GET_MODE (mask);
15276 if (rtx_equal_p (op0, op1))
15278 /* Shouldn't happen often (it's useless, obviously), but when it does
15279 we'd generate incorrect code if we continue below. */
15280 emit_move_insn (dest, op0);
15284 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
15286 gcc_assert (REGNO (op1) == REGNO (scratch));
15288 x = gen_rtx_AND (vmode, scratch, mask);
15289 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
15292 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
15293 x = gen_rtx_NOT (vmode, dest);
15294 x = gen_rtx_AND (vmode, x, op0);
15295 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15299 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
15301 x = gen_rtx_AND (vmode, scratch, mask);
15303 else /* alternative 2,4 */
15305 gcc_assert (REGNO (mask) == REGNO (scratch));
15306 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
15307 x = gen_rtx_AND (vmode, scratch, op1);
15309 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
15311 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
15313 dest = simplify_gen_subreg (vmode, op0, mode, 0);
15314 x = gen_rtx_AND (vmode, dest, nmask);
15316 else /* alternative 3,4 */
15318 gcc_assert (REGNO (nmask) == REGNO (dest));
15320 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
15321 x = gen_rtx_AND (vmode, dest, op0);
15323 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15326 x = gen_rtx_IOR (vmode, dest, scratch);
15327 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15330 /* Return TRUE or FALSE depending on whether the first SET in INSN
15331 has source and destination with matching CC modes, and that the
15332 CC mode is at least as constrained as REQ_MODE. */
15335 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
15338 enum machine_mode set_mode;
15340 set = PATTERN (insn);
15341 if (GET_CODE (set) == PARALLEL)
15342 set = XVECEXP (set, 0, 0);
15343 gcc_assert (GET_CODE (set) == SET);
15344 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
15346 set_mode = GET_MODE (SET_DEST (set));
15350 if (req_mode != CCNOmode
15351 && (req_mode != CCmode
15352 || XEXP (SET_SRC (set), 1) != const0_rtx))
15356 if (req_mode == CCGCmode)
15360 if (req_mode == CCGOCmode || req_mode == CCNOmode)
15364 if (req_mode == CCZmode)
15375 gcc_unreachable ();
15378 return (GET_MODE (SET_SRC (set)) == set_mode);
15381 /* Generate insn patterns to do an integer compare of OPERANDS. */
15384 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
15386 enum machine_mode cmpmode;
15389 cmpmode = SELECT_CC_MODE (code, op0, op1);
15390 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
15392 /* This is very simple, but making the interface the same as in the
15393 FP case makes the rest of the code easier. */
15394 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
15395 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
15397 /* Return the test that should be put into the flags user, i.e.
15398 the bcc, scc, or cmov instruction. */
15399 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
15402 /* Figure out whether to use ordered or unordered fp comparisons.
15403 Return the appropriate mode to use. */
15406 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
15408 /* ??? In order to make all comparisons reversible, we do all comparisons
15409 non-trapping when compiling for IEEE. Once gcc is able to distinguish
15410 all forms trapping and nontrapping comparisons, we can make inequality
15411 comparisons trapping again, since it results in better code when using
15412 FCOM based compares. */
15413 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
15417 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
15419 enum machine_mode mode = GET_MODE (op0);
15421 if (SCALAR_FLOAT_MODE_P (mode))
15423 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
15424 return ix86_fp_compare_mode (code);
15429 /* Only zero flag is needed. */
15430 case EQ: /* ZF=0 */
15431 case NE: /* ZF!=0 */
15433 /* Codes needing carry flag. */
15434 case GEU: /* CF=0 */
15435 case LTU: /* CF=1 */
15436 /* Detect overflow checks. They need just the carry flag. */
15437 if (GET_CODE (op0) == PLUS
15438 && rtx_equal_p (op1, XEXP (op0, 0)))
15442 case GTU: /* CF=0 & ZF=0 */
15443 case LEU: /* CF=1 | ZF=1 */
15444 /* Detect overflow checks. They need just the carry flag. */
15445 if (GET_CODE (op0) == MINUS
15446 && rtx_equal_p (op1, XEXP (op0, 0)))
15450 /* Codes possibly doable only with sign flag when
15451 comparing against zero. */
15452 case GE: /* SF=OF or SF=0 */
15453 case LT: /* SF<>OF or SF=1 */
15454 if (op1 == const0_rtx)
15457 /* For other cases Carry flag is not required. */
15459 /* Codes doable only with sign flag when comparing
15460 against zero, but we miss jump instruction for it
15461 so we need to use relational tests against overflow
15462 that thus needs to be zero. */
15463 case GT: /* ZF=0 & SF=OF */
15464 case LE: /* ZF=1 | SF<>OF */
15465 if (op1 == const0_rtx)
15469 /* strcmp pattern do (use flags) and combine may ask us for proper
15474 gcc_unreachable ();
15478 /* Return the fixed registers used for condition codes. */
15481 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
15488 /* If two condition code modes are compatible, return a condition code
15489 mode which is compatible with both. Otherwise, return
15492 static enum machine_mode
15493 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
15498 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
15501 if ((m1 == CCGCmode && m2 == CCGOCmode)
15502 || (m1 == CCGOCmode && m2 == CCGCmode))
15508 gcc_unreachable ();
15538 /* These are only compatible with themselves, which we already
15545 /* Return a comparison we can do and that it is equivalent to
15546 swap_condition (code) apart possibly from orderedness.
15547 But, never change orderedness if TARGET_IEEE_FP, returning
15548 UNKNOWN in that case if necessary. */
15550 static enum rtx_code
15551 ix86_fp_swap_condition (enum rtx_code code)
15555 case GT: /* GTU - CF=0 & ZF=0 */
15556 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
15557 case GE: /* GEU - CF=0 */
15558 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
15559 case UNLT: /* LTU - CF=1 */
15560 return TARGET_IEEE_FP ? UNKNOWN : GT;
15561 case UNLE: /* LEU - CF=1 | ZF=1 */
15562 return TARGET_IEEE_FP ? UNKNOWN : GE;
15564 return swap_condition (code);
15568 /* Return cost of comparison CODE using the best strategy for performance.
15569 All following functions do use number of instructions as a cost metrics.
15570 In future this should be tweaked to compute bytes for optimize_size and
15571 take into account performance of various instructions on various CPUs. */
15574 ix86_fp_comparison_cost (enum rtx_code code)
15578 /* The cost of code using bit-twiddling on %ah. */
15595 arith_cost = TARGET_IEEE_FP ? 5 : 4;
15599 arith_cost = TARGET_IEEE_FP ? 6 : 4;
15602 gcc_unreachable ();
15605 switch (ix86_fp_comparison_strategy (code))
15607 case IX86_FPCMP_COMI:
15608 return arith_cost > 4 ? 3 : 2;
15609 case IX86_FPCMP_SAHF:
15610 return arith_cost > 4 ? 4 : 3;
15616 /* Return strategy to use for floating-point. We assume that fcomi is always
15617 preferrable where available, since that is also true when looking at size
15618 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
15620 enum ix86_fpcmp_strategy
15621 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
15623 /* Do fcomi/sahf based test when profitable. */
15626 return IX86_FPCMP_COMI;
15628 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
15629 return IX86_FPCMP_SAHF;
15631 return IX86_FPCMP_ARITH;
15634 /* Swap, force into registers, or otherwise massage the two operands
15635 to a fp comparison. The operands are updated in place; the new
15636 comparison code is returned. */
15638 static enum rtx_code
15639 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
15641 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
15642 rtx op0 = *pop0, op1 = *pop1;
15643 enum machine_mode op_mode = GET_MODE (op0);
15644 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
15646 /* All of the unordered compare instructions only work on registers.
15647 The same is true of the fcomi compare instructions. The XFmode
15648 compare instructions require registers except when comparing
15649 against zero or when converting operand 1 from fixed point to
15653 && (fpcmp_mode == CCFPUmode
15654 || (op_mode == XFmode
15655 && ! (standard_80387_constant_p (op0) == 1
15656 || standard_80387_constant_p (op1) == 1)
15657 && GET_CODE (op1) != FLOAT)
15658 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
15660 op0 = force_reg (op_mode, op0);
15661 op1 = force_reg (op_mode, op1);
15665 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
15666 things around if they appear profitable, otherwise force op0
15667 into a register. */
15669 if (standard_80387_constant_p (op0) == 0
15671 && ! (standard_80387_constant_p (op1) == 0
15674 enum rtx_code new_code = ix86_fp_swap_condition (code);
15675 if (new_code != UNKNOWN)
15678 tmp = op0, op0 = op1, op1 = tmp;
15684 op0 = force_reg (op_mode, op0);
15686 if (CONSTANT_P (op1))
15688 int tmp = standard_80387_constant_p (op1);
15690 op1 = validize_mem (force_const_mem (op_mode, op1));
15694 op1 = force_reg (op_mode, op1);
15697 op1 = force_reg (op_mode, op1);
15701 /* Try to rearrange the comparison to make it cheaper. */
15702 if (ix86_fp_comparison_cost (code)
15703 > ix86_fp_comparison_cost (swap_condition (code))
15704 && (REG_P (op1) || can_create_pseudo_p ()))
15707 tmp = op0, op0 = op1, op1 = tmp;
15708 code = swap_condition (code);
15710 op0 = force_reg (op_mode, op0);
15718 /* Convert comparison codes we use to represent FP comparison to integer
15719 code that will result in proper branch. Return UNKNOWN if no such code
15723 ix86_fp_compare_code_to_integer (enum rtx_code code)
15752 /* Generate insn patterns to do a floating point compare of OPERANDS. */
15755 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
15757 enum machine_mode fpcmp_mode, intcmp_mode;
15760 fpcmp_mode = ix86_fp_compare_mode (code);
15761 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
15763 /* Do fcomi/sahf based test when profitable. */
15764 switch (ix86_fp_comparison_strategy (code))
15766 case IX86_FPCMP_COMI:
15767 intcmp_mode = fpcmp_mode;
15768 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
15769 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
15774 case IX86_FPCMP_SAHF:
15775 intcmp_mode = fpcmp_mode;
15776 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
15777 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
15781 scratch = gen_reg_rtx (HImode);
15782 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
15783 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
15786 case IX86_FPCMP_ARITH:
15787 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
15788 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
15789 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
15791 scratch = gen_reg_rtx (HImode);
15792 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
15794 /* In the unordered case, we have to check C2 for NaN's, which
15795 doesn't happen to work out to anything nice combination-wise.
15796 So do some bit twiddling on the value we've got in AH to come
15797 up with an appropriate set of condition codes. */
15799 intcmp_mode = CCNOmode;
15804 if (code == GT || !TARGET_IEEE_FP)
15806 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
15811 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
15812 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
15813 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
15814 intcmp_mode = CCmode;
15820 if (code == LT && TARGET_IEEE_FP)
15822 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
15823 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
15824 intcmp_mode = CCmode;
15829 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
15835 if (code == GE || !TARGET_IEEE_FP)
15837 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
15842 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
15843 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
15849 if (code == LE && TARGET_IEEE_FP)
15851 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
15852 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
15853 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
15854 intcmp_mode = CCmode;
15859 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
15865 if (code == EQ && TARGET_IEEE_FP)
15867 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
15868 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
15869 intcmp_mode = CCmode;
15874 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
15880 if (code == NE && TARGET_IEEE_FP)
15882 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
15883 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
15889 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
15895 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
15899 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
15904 gcc_unreachable ();
15912 /* Return the test that should be put into the flags user, i.e.
15913 the bcc, scc, or cmov instruction. */
15914 return gen_rtx_fmt_ee (code, VOIDmode,
15915 gen_rtx_REG (intcmp_mode, FLAGS_REG),
15920 ix86_expand_compare (enum rtx_code code)
15923 op0 = ix86_compare_op0;
15924 op1 = ix86_compare_op1;
15926 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC)
15927 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_op0, ix86_compare_op1);
15929 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
15931 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
15932 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
15935 ret = ix86_expand_int_compare (code, op0, op1);
15941 ix86_expand_branch (enum rtx_code code, rtx label)
15945 switch (GET_MODE (ix86_compare_op0))
15954 tmp = ix86_expand_compare (code);
15955 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
15956 gen_rtx_LABEL_REF (VOIDmode, label),
15958 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
15965 /* Expand DImode branch into multiple compare+branch. */
15967 rtx lo[2], hi[2], label2;
15968 enum rtx_code code1, code2, code3;
15969 enum machine_mode submode;
15971 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
15973 tmp = ix86_compare_op0;
15974 ix86_compare_op0 = ix86_compare_op1;
15975 ix86_compare_op1 = tmp;
15976 code = swap_condition (code);
15978 if (GET_MODE (ix86_compare_op0) == DImode)
15980 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
15981 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
15986 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
15987 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
15991 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
15992 avoid two branches. This costs one extra insn, so disable when
15993 optimizing for size. */
15995 if ((code == EQ || code == NE)
15996 && (!optimize_insn_for_size_p ()
15997 || hi[1] == const0_rtx || lo[1] == const0_rtx))
16002 if (hi[1] != const0_rtx)
16003 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
16004 NULL_RTX, 0, OPTAB_WIDEN);
16007 if (lo[1] != const0_rtx)
16008 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
16009 NULL_RTX, 0, OPTAB_WIDEN);
16011 tmp = expand_binop (submode, ior_optab, xor1, xor0,
16012 NULL_RTX, 0, OPTAB_WIDEN);
16014 ix86_compare_op0 = tmp;
16015 ix86_compare_op1 = const0_rtx;
16016 ix86_expand_branch (code, label);
16020 /* Otherwise, if we are doing less-than or greater-or-equal-than,
16021 op1 is a constant and the low word is zero, then we can just
16022 examine the high word. Similarly for low word -1 and
16023 less-or-equal-than or greater-than. */
16025 if (CONST_INT_P (hi[1]))
16028 case LT: case LTU: case GE: case GEU:
16029 if (lo[1] == const0_rtx)
16031 ix86_compare_op0 = hi[0];
16032 ix86_compare_op1 = hi[1];
16033 ix86_expand_branch (code, label);
16037 case LE: case LEU: case GT: case GTU:
16038 if (lo[1] == constm1_rtx)
16040 ix86_compare_op0 = hi[0];
16041 ix86_compare_op1 = hi[1];
16042 ix86_expand_branch (code, label);
16050 /* Otherwise, we need two or three jumps. */
16052 label2 = gen_label_rtx ();
16055 code2 = swap_condition (code);
16056 code3 = unsigned_condition (code);
16060 case LT: case GT: case LTU: case GTU:
16063 case LE: code1 = LT; code2 = GT; break;
16064 case GE: code1 = GT; code2 = LT; break;
16065 case LEU: code1 = LTU; code2 = GTU; break;
16066 case GEU: code1 = GTU; code2 = LTU; break;
16068 case EQ: code1 = UNKNOWN; code2 = NE; break;
16069 case NE: code2 = UNKNOWN; break;
16072 gcc_unreachable ();
16077 * if (hi(a) < hi(b)) goto true;
16078 * if (hi(a) > hi(b)) goto false;
16079 * if (lo(a) < lo(b)) goto true;
16083 ix86_compare_op0 = hi[0];
16084 ix86_compare_op1 = hi[1];
16086 if (code1 != UNKNOWN)
16087 ix86_expand_branch (code1, label);
16088 if (code2 != UNKNOWN)
16089 ix86_expand_branch (code2, label2);
16091 ix86_compare_op0 = lo[0];
16092 ix86_compare_op1 = lo[1];
16093 ix86_expand_branch (code3, label);
16095 if (code2 != UNKNOWN)
16096 emit_label (label2);
16101 /* If we have already emitted a compare insn, go straight to simple.
16102 ix86_expand_compare won't emit anything if ix86_compare_emitted
16104 gcc_assert (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC);
16109 /* Split branch based on floating point condition. */
16111 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
16112 rtx target1, rtx target2, rtx tmp, rtx pushed)
16117 if (target2 != pc_rtx)
16120 code = reverse_condition_maybe_unordered (code);
16125 condition = ix86_expand_fp_compare (code, op1, op2,
16128 /* Remove pushed operand from stack. */
16130 ix86_free_from_memory (GET_MODE (pushed));
16132 i = emit_jump_insn (gen_rtx_SET
16134 gen_rtx_IF_THEN_ELSE (VOIDmode,
16135 condition, target1, target2)));
16136 if (split_branch_probability >= 0)
16137 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
16141 ix86_expand_setcc (enum rtx_code code, rtx dest)
16145 gcc_assert (GET_MODE (dest) == QImode);
16147 ret = ix86_expand_compare (code);
16148 PUT_MODE (ret, QImode);
16149 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
16152 /* Expand comparison setting or clearing carry flag. Return true when
16153 successful and set pop for the operation. */
16155 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
16157 enum machine_mode mode =
16158 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
16160 /* Do not handle DImode compares that go through special path. */
16161 if (mode == (TARGET_64BIT ? TImode : DImode))
16164 if (SCALAR_FLOAT_MODE_P (mode))
16166 rtx compare_op, compare_seq;
16168 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
16170 /* Shortcut: following common codes never translate
16171 into carry flag compares. */
16172 if (code == EQ || code == NE || code == UNEQ || code == LTGT
16173 || code == ORDERED || code == UNORDERED)
16176 /* These comparisons require zero flag; swap operands so they won't. */
16177 if ((code == GT || code == UNLE || code == LE || code == UNGT)
16178 && !TARGET_IEEE_FP)
16183 code = swap_condition (code);
16186 /* Try to expand the comparison and verify that we end up with
16187 carry flag based comparison. This fails to be true only when
16188 we decide to expand comparison using arithmetic that is not
16189 too common scenario. */
16191 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
16192 compare_seq = get_insns ();
16195 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
16196 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
16197 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
16199 code = GET_CODE (compare_op);
16201 if (code != LTU && code != GEU)
16204 emit_insn (compare_seq);
16209 if (!INTEGRAL_MODE_P (mode))
16218 /* Convert a==0 into (unsigned)a<1. */
16221 if (op1 != const0_rtx)
16224 code = (code == EQ ? LTU : GEU);
16227 /* Convert a>b into b<a or a>=b-1. */
16230 if (CONST_INT_P (op1))
16232 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
16233 /* Bail out on overflow. We still can swap operands but that
16234 would force loading of the constant into register. */
16235 if (op1 == const0_rtx
16236 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
16238 code = (code == GTU ? GEU : LTU);
16245 code = (code == GTU ? LTU : GEU);
16249 /* Convert a>=0 into (unsigned)a<0x80000000. */
16252 if (mode == DImode || op1 != const0_rtx)
16254 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
16255 code = (code == LT ? GEU : LTU);
16259 if (mode == DImode || op1 != constm1_rtx)
16261 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
16262 code = (code == LE ? GEU : LTU);
16268 /* Swapping operands may cause constant to appear as first operand. */
16269 if (!nonimmediate_operand (op0, VOIDmode))
16271 if (!can_create_pseudo_p ())
16273 op0 = force_reg (mode, op0);
16275 ix86_compare_op0 = op0;
16276 ix86_compare_op1 = op1;
16277 *pop = ix86_expand_compare (code);
16278 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
16283 ix86_expand_int_movcc (rtx operands[])
16285 enum rtx_code code = GET_CODE (operands[1]), compare_code;
16286 rtx compare_seq, compare_op;
16287 enum machine_mode mode = GET_MODE (operands[0]);
16288 bool sign_bit_compare_p = false;
16291 ix86_compare_op0 = XEXP (operands[1], 0);
16292 ix86_compare_op1 = XEXP (operands[1], 1);
16293 compare_op = ix86_expand_compare (code);
16294 compare_seq = get_insns ();
16297 compare_code = GET_CODE (compare_op);
16299 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
16300 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
16301 sign_bit_compare_p = true;
16303 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
16304 HImode insns, we'd be swallowed in word prefix ops. */
16306 if ((mode != HImode || TARGET_FAST_PREFIX)
16307 && (mode != (TARGET_64BIT ? TImode : DImode))
16308 && CONST_INT_P (operands[2])
16309 && CONST_INT_P (operands[3]))
16311 rtx out = operands[0];
16312 HOST_WIDE_INT ct = INTVAL (operands[2]);
16313 HOST_WIDE_INT cf = INTVAL (operands[3]);
16314 HOST_WIDE_INT diff;
16317 /* Sign bit compares are better done using shifts than we do by using
16319 if (sign_bit_compare_p
16320 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
16321 ix86_compare_op1, &compare_op))
16323 /* Detect overlap between destination and compare sources. */
16326 if (!sign_bit_compare_p)
16329 bool fpcmp = false;
16331 compare_code = GET_CODE (compare_op);
16333 flags = XEXP (compare_op, 0);
16335 if (GET_MODE (flags) == CCFPmode
16336 || GET_MODE (flags) == CCFPUmode)
16340 = ix86_fp_compare_code_to_integer (compare_code);
16343 /* To simplify rest of code, restrict to the GEU case. */
16344 if (compare_code == LTU)
16346 HOST_WIDE_INT tmp = ct;
16349 compare_code = reverse_condition (compare_code);
16350 code = reverse_condition (code);
16355 PUT_CODE (compare_op,
16356 reverse_condition_maybe_unordered
16357 (GET_CODE (compare_op)));
16359 PUT_CODE (compare_op,
16360 reverse_condition (GET_CODE (compare_op)));
16364 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
16365 || reg_overlap_mentioned_p (out, ix86_compare_op1))
16366 tmp = gen_reg_rtx (mode);
16368 if (mode == DImode)
16369 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
16371 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
16372 flags, compare_op));
16376 if (code == GT || code == GE)
16377 code = reverse_condition (code);
16380 HOST_WIDE_INT tmp = ct;
16385 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
16386 ix86_compare_op1, VOIDmode, 0, -1);
16399 tmp = expand_simple_binop (mode, PLUS,
16401 copy_rtx (tmp), 1, OPTAB_DIRECT);
16412 tmp = expand_simple_binop (mode, IOR,
16414 copy_rtx (tmp), 1, OPTAB_DIRECT);
16416 else if (diff == -1 && ct)
16426 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
16428 tmp = expand_simple_binop (mode, PLUS,
16429 copy_rtx (tmp), GEN_INT (cf),
16430 copy_rtx (tmp), 1, OPTAB_DIRECT);
16438 * andl cf - ct, dest
16448 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
16451 tmp = expand_simple_binop (mode, AND,
16453 gen_int_mode (cf - ct, mode),
16454 copy_rtx (tmp), 1, OPTAB_DIRECT);
16456 tmp = expand_simple_binop (mode, PLUS,
16457 copy_rtx (tmp), GEN_INT (ct),
16458 copy_rtx (tmp), 1, OPTAB_DIRECT);
16461 if (!rtx_equal_p (tmp, out))
16462 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
16464 return 1; /* DONE */
16469 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
16472 tmp = ct, ct = cf, cf = tmp;
16475 if (SCALAR_FLOAT_MODE_P (cmp_mode))
16477 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
16479 /* We may be reversing unordered compare to normal compare, that
16480 is not valid in general (we may convert non-trapping condition
16481 to trapping one), however on i386 we currently emit all
16482 comparisons unordered. */
16483 compare_code = reverse_condition_maybe_unordered (compare_code);
16484 code = reverse_condition_maybe_unordered (code);
16488 compare_code = reverse_condition (compare_code);
16489 code = reverse_condition (code);
16493 compare_code = UNKNOWN;
16494 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
16495 && CONST_INT_P (ix86_compare_op1))
16497 if (ix86_compare_op1 == const0_rtx
16498 && (code == LT || code == GE))
16499 compare_code = code;
16500 else if (ix86_compare_op1 == constm1_rtx)
16504 else if (code == GT)
16509 /* Optimize dest = (op0 < 0) ? -1 : cf. */
16510 if (compare_code != UNKNOWN
16511 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
16512 && (cf == -1 || ct == -1))
16514 /* If lea code below could be used, only optimize
16515 if it results in a 2 insn sequence. */
16517 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
16518 || diff == 3 || diff == 5 || diff == 9)
16519 || (compare_code == LT && ct == -1)
16520 || (compare_code == GE && cf == -1))
16523 * notl op1 (if necessary)
16531 code = reverse_condition (code);
16534 out = emit_store_flag (out, code, ix86_compare_op0,
16535 ix86_compare_op1, VOIDmode, 0, -1);
16537 out = expand_simple_binop (mode, IOR,
16539 out, 1, OPTAB_DIRECT);
16540 if (out != operands[0])
16541 emit_move_insn (operands[0], out);
16543 return 1; /* DONE */
16548 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
16549 || diff == 3 || diff == 5 || diff == 9)
16550 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
16552 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
16558 * lea cf(dest*(ct-cf)),dest
16562 * This also catches the degenerate setcc-only case.
16568 out = emit_store_flag (out, code, ix86_compare_op0,
16569 ix86_compare_op1, VOIDmode, 0, 1);
16572 /* On x86_64 the lea instruction operates on Pmode, so we need
16573 to get arithmetics done in proper mode to match. */
16575 tmp = copy_rtx (out);
16579 out1 = copy_rtx (out);
16580 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
16584 tmp = gen_rtx_PLUS (mode, tmp, out1);
16590 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
16593 if (!rtx_equal_p (tmp, out))
16596 out = force_operand (tmp, copy_rtx (out));
16598 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
16600 if (!rtx_equal_p (out, operands[0]))
16601 emit_move_insn (operands[0], copy_rtx (out));
16603 return 1; /* DONE */
16607 * General case: Jumpful:
16608 * xorl dest,dest cmpl op1, op2
16609 * cmpl op1, op2 movl ct, dest
16610 * setcc dest jcc 1f
16611 * decl dest movl cf, dest
16612 * andl (cf-ct),dest 1:
16615 * Size 20. Size 14.
16617 * This is reasonably steep, but branch mispredict costs are
16618 * high on modern cpus, so consider failing only if optimizing
16622 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
16623 && BRANCH_COST (optimize_insn_for_speed_p (),
16628 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
16633 if (SCALAR_FLOAT_MODE_P (cmp_mode))
16635 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
16637 /* We may be reversing unordered compare to normal compare,
16638 that is not valid in general (we may convert non-trapping
16639 condition to trapping one), however on i386 we currently
16640 emit all comparisons unordered. */
16641 code = reverse_condition_maybe_unordered (code);
16645 code = reverse_condition (code);
16646 if (compare_code != UNKNOWN)
16647 compare_code = reverse_condition (compare_code);
16651 if (compare_code != UNKNOWN)
16653 /* notl op1 (if needed)
16658 For x < 0 (resp. x <= -1) there will be no notl,
16659 so if possible swap the constants to get rid of the
16661 True/false will be -1/0 while code below (store flag
16662 followed by decrement) is 0/-1, so the constants need
16663 to be exchanged once more. */
16665 if (compare_code == GE || !cf)
16667 code = reverse_condition (code);
16672 HOST_WIDE_INT tmp = cf;
16677 out = emit_store_flag (out, code, ix86_compare_op0,
16678 ix86_compare_op1, VOIDmode, 0, -1);
16682 out = emit_store_flag (out, code, ix86_compare_op0,
16683 ix86_compare_op1, VOIDmode, 0, 1);
16685 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
16686 copy_rtx (out), 1, OPTAB_DIRECT);
16689 out = expand_simple_binop (mode, AND, copy_rtx (out),
16690 gen_int_mode (cf - ct, mode),
16691 copy_rtx (out), 1, OPTAB_DIRECT);
16693 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
16694 copy_rtx (out), 1, OPTAB_DIRECT);
16695 if (!rtx_equal_p (out, operands[0]))
16696 emit_move_insn (operands[0], copy_rtx (out));
16698 return 1; /* DONE */
16702 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
16704 /* Try a few things more with specific constants and a variable. */
16707 rtx var, orig_out, out, tmp;
16709 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
16710 return 0; /* FAIL */
16712 /* If one of the two operands is an interesting constant, load a
16713 constant with the above and mask it in with a logical operation. */
16715 if (CONST_INT_P (operands[2]))
16718 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
16719 operands[3] = constm1_rtx, op = and_optab;
16720 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
16721 operands[3] = const0_rtx, op = ior_optab;
16723 return 0; /* FAIL */
16725 else if (CONST_INT_P (operands[3]))
16728 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
16729 operands[2] = constm1_rtx, op = and_optab;
16730 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
16731 operands[2] = const0_rtx, op = ior_optab;
16733 return 0; /* FAIL */
16736 return 0; /* FAIL */
16738 orig_out = operands[0];
16739 tmp = gen_reg_rtx (mode);
16742 /* Recurse to get the constant loaded. */
16743 if (ix86_expand_int_movcc (operands) == 0)
16744 return 0; /* FAIL */
16746 /* Mask in the interesting variable. */
16747 out = expand_binop (mode, op, var, tmp, orig_out, 0,
16749 if (!rtx_equal_p (out, orig_out))
16750 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
16752 return 1; /* DONE */
16756 * For comparison with above,
16766 if (! nonimmediate_operand (operands[2], mode))
16767 operands[2] = force_reg (mode, operands[2]);
16768 if (! nonimmediate_operand (operands[3], mode))
16769 operands[3] = force_reg (mode, operands[3]);
16771 if (! register_operand (operands[2], VOIDmode)
16773 || ! register_operand (operands[3], VOIDmode)))
16774 operands[2] = force_reg (mode, operands[2]);
16777 && ! register_operand (operands[3], VOIDmode))
16778 operands[3] = force_reg (mode, operands[3]);
16780 emit_insn (compare_seq);
16781 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
16782 gen_rtx_IF_THEN_ELSE (mode,
16783 compare_op, operands[2],
16786 return 1; /* DONE */
16789 /* Swap, force into registers, or otherwise massage the two operands
16790 to an sse comparison with a mask result. Thus we differ a bit from
16791 ix86_prepare_fp_compare_args which expects to produce a flags result.
16793 The DEST operand exists to help determine whether to commute commutative
16794 operators. The POP0/POP1 operands are updated in place. The new
16795 comparison code is returned, or UNKNOWN if not implementable. */
16797 static enum rtx_code
16798 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
16799 rtx *pop0, rtx *pop1)
16807 /* We have no LTGT as an operator. We could implement it with
16808 NE & ORDERED, but this requires an extra temporary. It's
16809 not clear that it's worth it. */
16816 /* These are supported directly. */
16823 /* For commutative operators, try to canonicalize the destination
16824 operand to be first in the comparison - this helps reload to
16825 avoid extra moves. */
16826 if (!dest || !rtx_equal_p (dest, *pop1))
16834 /* These are not supported directly. Swap the comparison operands
16835 to transform into something that is supported. */
16839 code = swap_condition (code);
16843 gcc_unreachable ();
16849 /* Detect conditional moves that exactly match min/max operational
16850 semantics. Note that this is IEEE safe, as long as we don't
16851 interchange the operands.
16853 Returns FALSE if this conditional move doesn't match a MIN/MAX,
16854 and TRUE if the operation is successful and instructions are emitted. */
16857 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
16858 rtx cmp_op1, rtx if_true, rtx if_false)
16860 enum machine_mode mode;
16866 else if (code == UNGE)
16869 if_true = if_false;
16875 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
16877 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
16882 mode = GET_MODE (dest);
16884 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
16885 but MODE may be a vector mode and thus not appropriate. */
16886 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
16888 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
16891 if_true = force_reg (mode, if_true);
16892 v = gen_rtvec (2, if_true, if_false);
16893 tmp = gen_rtx_UNSPEC (mode, v, u);
16897 code = is_min ? SMIN : SMAX;
16898 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
16901 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
16905 /* Expand an sse vector comparison. Return the register with the result. */
16908 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
16909 rtx op_true, rtx op_false)
16911 enum machine_mode mode = GET_MODE (dest);
16914 cmp_op0 = force_reg (mode, cmp_op0);
16915 if (!nonimmediate_operand (cmp_op1, mode))
16916 cmp_op1 = force_reg (mode, cmp_op1);
16919 || reg_overlap_mentioned_p (dest, op_true)
16920 || reg_overlap_mentioned_p (dest, op_false))
16921 dest = gen_reg_rtx (mode);
16923 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
16924 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16929 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
16930 operations. This is used for both scalar and vector conditional moves. */
16933 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
16935 enum machine_mode mode = GET_MODE (dest);
16938 if (op_false == CONST0_RTX (mode))
16940 op_true = force_reg (mode, op_true);
16941 x = gen_rtx_AND (mode, cmp, op_true);
16942 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16944 else if (op_true == CONST0_RTX (mode))
16946 op_false = force_reg (mode, op_false);
16947 x = gen_rtx_NOT (mode, cmp);
16948 x = gen_rtx_AND (mode, x, op_false);
16949 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16951 else if (TARGET_XOP)
16953 rtx pcmov = gen_rtx_SET (mode, dest,
16954 gen_rtx_IF_THEN_ELSE (mode, cmp,
16961 op_true = force_reg (mode, op_true);
16962 op_false = force_reg (mode, op_false);
16964 t2 = gen_reg_rtx (mode);
16966 t3 = gen_reg_rtx (mode);
16970 x = gen_rtx_AND (mode, op_true, cmp);
16971 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
16973 x = gen_rtx_NOT (mode, cmp);
16974 x = gen_rtx_AND (mode, x, op_false);
16975 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
16977 x = gen_rtx_IOR (mode, t3, t2);
16978 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16982 /* Expand a floating-point conditional move. Return true if successful. */
16985 ix86_expand_fp_movcc (rtx operands[])
16987 enum machine_mode mode = GET_MODE (operands[0]);
16988 enum rtx_code code = GET_CODE (operands[1]);
16989 rtx tmp, compare_op;
16991 ix86_compare_op0 = XEXP (operands[1], 0);
16992 ix86_compare_op1 = XEXP (operands[1], 1);
16993 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
16995 enum machine_mode cmode;
16997 /* Since we've no cmove for sse registers, don't force bad register
16998 allocation just to gain access to it. Deny movcc when the
16999 comparison mode doesn't match the move mode. */
17000 cmode = GET_MODE (ix86_compare_op0);
17001 if (cmode == VOIDmode)
17002 cmode = GET_MODE (ix86_compare_op1);
17006 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
17008 &ix86_compare_op1);
17009 if (code == UNKNOWN)
17012 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
17013 ix86_compare_op1, operands[2],
17017 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
17018 ix86_compare_op1, operands[2], operands[3]);
17019 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
17023 /* The floating point conditional move instructions don't directly
17024 support conditions resulting from a signed integer comparison. */
17026 compare_op = ix86_expand_compare (code);
17027 if (!fcmov_comparison_operator (compare_op, VOIDmode))
17029 tmp = gen_reg_rtx (QImode);
17030 ix86_expand_setcc (code, tmp);
17032 ix86_compare_op0 = tmp;
17033 ix86_compare_op1 = const0_rtx;
17034 compare_op = ix86_expand_compare (code);
17037 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17038 gen_rtx_IF_THEN_ELSE (mode, compare_op,
17039 operands[2], operands[3])));
17044 /* Expand a floating-point vector conditional move; a vcond operation
17045 rather than a movcc operation. */
17048 ix86_expand_fp_vcond (rtx operands[])
17050 enum rtx_code code = GET_CODE (operands[3]);
17053 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
17054 &operands[4], &operands[5]);
17055 if (code == UNKNOWN)
17058 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
17059 operands[5], operands[1], operands[2]))
17062 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
17063 operands[1], operands[2]);
17064 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
17068 /* Expand a signed/unsigned integral vector conditional move. */
17071 ix86_expand_int_vcond (rtx operands[])
17073 enum machine_mode mode = GET_MODE (operands[0]);
17074 enum rtx_code code = GET_CODE (operands[3]);
17075 bool negate = false;
17078 cop0 = operands[4];
17079 cop1 = operands[5];
17081 /* XOP supports all of the comparisons on all vector int types. */
17084 /* Canonicalize the comparison to EQ, GT, GTU. */
17095 code = reverse_condition (code);
17101 code = reverse_condition (code);
17107 code = swap_condition (code);
17108 x = cop0, cop0 = cop1, cop1 = x;
17112 gcc_unreachable ();
17115 /* Only SSE4.1/SSE4.2 supports V2DImode. */
17116 if (mode == V2DImode)
17121 /* SSE4.1 supports EQ. */
17122 if (!TARGET_SSE4_1)
17128 /* SSE4.2 supports GT/GTU. */
17129 if (!TARGET_SSE4_2)
17134 gcc_unreachable ();
17138 /* Unsigned parallel compare is not supported by the hardware.
17139 Play some tricks to turn this into a signed comparison
17143 cop0 = force_reg (mode, cop0);
17151 rtx (*gen_sub3) (rtx, rtx, rtx);
17153 /* Subtract (-(INT MAX) - 1) from both operands to make
17155 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
17157 gen_sub3 = (mode == V4SImode
17158 ? gen_subv4si3 : gen_subv2di3);
17159 t1 = gen_reg_rtx (mode);
17160 emit_insn (gen_sub3 (t1, cop0, mask));
17162 t2 = gen_reg_rtx (mode);
17163 emit_insn (gen_sub3 (t2, cop1, mask));
17173 /* Perform a parallel unsigned saturating subtraction. */
17174 x = gen_reg_rtx (mode);
17175 emit_insn (gen_rtx_SET (VOIDmode, x,
17176 gen_rtx_US_MINUS (mode, cop0, cop1)));
17179 cop1 = CONST0_RTX (mode);
17185 gcc_unreachable ();
17190 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
17191 operands[1+negate], operands[2-negate]);
17193 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
17194 operands[2-negate]);
17198 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
17199 true if we should do zero extension, else sign extension. HIGH_P is
17200 true if we want the N/2 high elements, else the low elements. */
17203 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
17205 enum machine_mode imode = GET_MODE (operands[1]);
17206 rtx (*unpack)(rtx, rtx, rtx);
17213 unpack = gen_vec_interleave_highv16qi;
17215 unpack = gen_vec_interleave_lowv16qi;
17219 unpack = gen_vec_interleave_highv8hi;
17221 unpack = gen_vec_interleave_lowv8hi;
17225 unpack = gen_vec_interleave_highv4si;
17227 unpack = gen_vec_interleave_lowv4si;
17230 gcc_unreachable ();
17233 dest = gen_lowpart (imode, operands[0]);
17236 se = force_reg (imode, CONST0_RTX (imode));
17238 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
17239 operands[1], pc_rtx, pc_rtx);
17241 emit_insn (unpack (dest, operands[1], se));
17244 /* This function performs the same task as ix86_expand_sse_unpack,
17245 but with SSE4.1 instructions. */
17248 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
17250 enum machine_mode imode = GET_MODE (operands[1]);
17251 rtx (*unpack)(rtx, rtx);
17258 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
17260 unpack = gen_sse4_1_extendv8qiv8hi2;
17264 unpack = gen_sse4_1_zero_extendv4hiv4si2;
17266 unpack = gen_sse4_1_extendv4hiv4si2;
17270 unpack = gen_sse4_1_zero_extendv2siv2di2;
17272 unpack = gen_sse4_1_extendv2siv2di2;
17275 gcc_unreachable ();
17278 dest = operands[0];
17281 /* Shift higher 8 bytes to lower 8 bytes. */
17282 src = gen_reg_rtx (imode);
17283 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, src),
17284 gen_lowpart (V1TImode, operands[1]),
17290 emit_insn (unpack (dest, src));
17293 /* Expand conditional increment or decrement using adb/sbb instructions.
17294 The default case using setcc followed by the conditional move can be
17295 done by generic code. */
17297 ix86_expand_int_addcc (rtx operands[])
17299 enum rtx_code code = GET_CODE (operands[1]);
17301 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
17303 rtx val = const0_rtx;
17304 bool fpcmp = false;
17305 enum machine_mode mode;
17307 ix86_compare_op0 = XEXP (operands[1], 0);
17308 ix86_compare_op1 = XEXP (operands[1], 1);
17309 if (operands[3] != const1_rtx
17310 && operands[3] != constm1_rtx)
17312 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
17313 ix86_compare_op1, &compare_op))
17315 code = GET_CODE (compare_op);
17317 flags = XEXP (compare_op, 0);
17319 if (GET_MODE (flags) == CCFPmode
17320 || GET_MODE (flags) == CCFPUmode)
17323 code = ix86_fp_compare_code_to_integer (code);
17330 PUT_CODE (compare_op,
17331 reverse_condition_maybe_unordered
17332 (GET_CODE (compare_op)));
17334 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
17337 mode = GET_MODE (operands[0]);
17339 /* Construct either adc or sbb insn. */
17340 if ((code == LTU) == (operands[3] == constm1_rtx))
17345 insn = gen_subqi3_carry;
17348 insn = gen_subhi3_carry;
17351 insn = gen_subsi3_carry;
17354 insn = gen_subdi3_carry;
17357 gcc_unreachable ();
17365 insn = gen_addqi3_carry;
17368 insn = gen_addhi3_carry;
17371 insn = gen_addsi3_carry;
17374 insn = gen_adddi3_carry;
17377 gcc_unreachable ();
17380 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
17382 return 1; /* DONE */
17386 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
17387 works for floating pointer parameters and nonoffsetable memories.
17388 For pushes, it returns just stack offsets; the values will be saved
17389 in the right order. Maximally three parts are generated. */
17392 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
17397 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
17399 size = (GET_MODE_SIZE (mode) + 4) / 8;
17401 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
17402 gcc_assert (size >= 2 && size <= 4);
17404 /* Optimize constant pool reference to immediates. This is used by fp
17405 moves, that force all constants to memory to allow combining. */
17406 if (MEM_P (operand) && MEM_READONLY_P (operand))
17408 rtx tmp = maybe_get_pool_constant (operand);
17413 if (MEM_P (operand) && !offsettable_memref_p (operand))
17415 /* The only non-offsetable memories we handle are pushes. */
17416 int ok = push_operand (operand, VOIDmode);
17420 operand = copy_rtx (operand);
17421 PUT_MODE (operand, Pmode);
17422 parts[0] = parts[1] = parts[2] = parts[3] = operand;
17426 if (GET_CODE (operand) == CONST_VECTOR)
17428 enum machine_mode imode = int_mode_for_mode (mode);
17429 /* Caution: if we looked through a constant pool memory above,
17430 the operand may actually have a different mode now. That's
17431 ok, since we want to pun this all the way back to an integer. */
17432 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
17433 gcc_assert (operand != NULL);
17439 if (mode == DImode)
17440 split_di (&operand, 1, &parts[0], &parts[1]);
17445 if (REG_P (operand))
17447 gcc_assert (reload_completed);
17448 for (i = 0; i < size; i++)
17449 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
17451 else if (offsettable_memref_p (operand))
17453 operand = adjust_address (operand, SImode, 0);
17454 parts[0] = operand;
17455 for (i = 1; i < size; i++)
17456 parts[i] = adjust_address (operand, SImode, 4 * i);
17458 else if (GET_CODE (operand) == CONST_DOUBLE)
17463 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
17467 real_to_target (l, &r, mode);
17468 parts[3] = gen_int_mode (l[3], SImode);
17469 parts[2] = gen_int_mode (l[2], SImode);
17472 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
17473 parts[2] = gen_int_mode (l[2], SImode);
17476 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
17479 gcc_unreachable ();
17481 parts[1] = gen_int_mode (l[1], SImode);
17482 parts[0] = gen_int_mode (l[0], SImode);
17485 gcc_unreachable ();
17490 if (mode == TImode)
17491 split_ti (&operand, 1, &parts[0], &parts[1]);
17492 if (mode == XFmode || mode == TFmode)
17494 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
17495 if (REG_P (operand))
17497 gcc_assert (reload_completed);
17498 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
17499 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
17501 else if (offsettable_memref_p (operand))
17503 operand = adjust_address (operand, DImode, 0);
17504 parts[0] = operand;
17505 parts[1] = adjust_address (operand, upper_mode, 8);
17507 else if (GET_CODE (operand) == CONST_DOUBLE)
17512 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
17513 real_to_target (l, &r, mode);
17515 /* Do not use shift by 32 to avoid warning on 32bit systems. */
17516 if (HOST_BITS_PER_WIDE_INT >= 64)
17519 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
17520 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
17523 parts[0] = immed_double_const (l[0], l[1], DImode);
17525 if (upper_mode == SImode)
17526 parts[1] = gen_int_mode (l[2], SImode);
17527 else if (HOST_BITS_PER_WIDE_INT >= 64)
17530 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
17531 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
17534 parts[1] = immed_double_const (l[2], l[3], DImode);
17537 gcc_unreachable ();
17544 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
17545 Return false when normal moves are needed; true when all required
17546 insns have been emitted. Operands 2-4 contain the input values
17547 int the correct order; operands 5-7 contain the output values. */
17550 ix86_split_long_move (rtx operands[])
17555 int collisions = 0;
17556 enum machine_mode mode = GET_MODE (operands[0]);
17557 bool collisionparts[4];
17559 /* The DFmode expanders may ask us to move double.
17560 For 64bit target this is single move. By hiding the fact
17561 here we simplify i386.md splitters. */
17562 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
17564 /* Optimize constant pool reference to immediates. This is used by
17565 fp moves, that force all constants to memory to allow combining. */
17567 if (MEM_P (operands[1])
17568 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
17569 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
17570 operands[1] = get_pool_constant (XEXP (operands[1], 0));
17571 if (push_operand (operands[0], VOIDmode))
17573 operands[0] = copy_rtx (operands[0]);
17574 PUT_MODE (operands[0], Pmode);
17577 operands[0] = gen_lowpart (DImode, operands[0]);
17578 operands[1] = gen_lowpart (DImode, operands[1]);
17579 emit_move_insn (operands[0], operands[1]);
17583 /* The only non-offsettable memory we handle is push. */
17584 if (push_operand (operands[0], VOIDmode))
17587 gcc_assert (!MEM_P (operands[0])
17588 || offsettable_memref_p (operands[0]));
17590 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
17591 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
17593 /* When emitting push, take care for source operands on the stack. */
17594 if (push && MEM_P (operands[1])
17595 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
17597 rtx src_base = XEXP (part[1][nparts - 1], 0);
17599 /* Compensate for the stack decrement by 4. */
17600 if (!TARGET_64BIT && nparts == 3
17601 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
17602 src_base = plus_constant (src_base, 4);
17604 /* src_base refers to the stack pointer and is
17605 automatically decreased by emitted push. */
17606 for (i = 0; i < nparts; i++)
17607 part[1][i] = change_address (part[1][i],
17608 GET_MODE (part[1][i]), src_base);
17611 /* We need to do copy in the right order in case an address register
17612 of the source overlaps the destination. */
17613 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
17617 for (i = 0; i < nparts; i++)
17620 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
17621 if (collisionparts[i])
17625 /* Collision in the middle part can be handled by reordering. */
17626 if (collisions == 1 && nparts == 3 && collisionparts [1])
17628 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
17629 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
17631 else if (collisions == 1
17633 && (collisionparts [1] || collisionparts [2]))
17635 if (collisionparts [1])
17637 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
17638 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
17642 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
17643 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
17647 /* If there are more collisions, we can't handle it by reordering.
17648 Do an lea to the last part and use only one colliding move. */
17649 else if (collisions > 1)
17655 base = part[0][nparts - 1];
17657 /* Handle the case when the last part isn't valid for lea.
17658 Happens in 64-bit mode storing the 12-byte XFmode. */
17659 if (GET_MODE (base) != Pmode)
17660 base = gen_rtx_REG (Pmode, REGNO (base));
17662 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
17663 part[1][0] = replace_equiv_address (part[1][0], base);
17664 for (i = 1; i < nparts; i++)
17666 tmp = plus_constant (base, UNITS_PER_WORD * i);
17667 part[1][i] = replace_equiv_address (part[1][i], tmp);
17678 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
17679 emit_insn (gen_addsi3 (stack_pointer_rtx,
17680 stack_pointer_rtx, GEN_INT (-4)));
17681 emit_move_insn (part[0][2], part[1][2]);
17683 else if (nparts == 4)
17685 emit_move_insn (part[0][3], part[1][3]);
17686 emit_move_insn (part[0][2], part[1][2]);
17691 /* In 64bit mode we don't have 32bit push available. In case this is
17692 register, it is OK - we will just use larger counterpart. We also
17693 retype memory - these comes from attempt to avoid REX prefix on
17694 moving of second half of TFmode value. */
17695 if (GET_MODE (part[1][1]) == SImode)
17697 switch (GET_CODE (part[1][1]))
17700 part[1][1] = adjust_address (part[1][1], DImode, 0);
17704 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
17708 gcc_unreachable ();
17711 if (GET_MODE (part[1][0]) == SImode)
17712 part[1][0] = part[1][1];
17715 emit_move_insn (part[0][1], part[1][1]);
17716 emit_move_insn (part[0][0], part[1][0]);
17720 /* Choose correct order to not overwrite the source before it is copied. */
17721 if ((REG_P (part[0][0])
17722 && REG_P (part[1][1])
17723 && (REGNO (part[0][0]) == REGNO (part[1][1])
17725 && REGNO (part[0][0]) == REGNO (part[1][2]))
17727 && REGNO (part[0][0]) == REGNO (part[1][3]))))
17729 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
17731 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
17733 operands[2 + i] = part[0][j];
17734 operands[6 + i] = part[1][j];
17739 for (i = 0; i < nparts; i++)
17741 operands[2 + i] = part[0][i];
17742 operands[6 + i] = part[1][i];
17746 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
17747 if (optimize_insn_for_size_p ())
17749 for (j = 0; j < nparts - 1; j++)
17750 if (CONST_INT_P (operands[6 + j])
17751 && operands[6 + j] != const0_rtx
17752 && REG_P (operands[2 + j]))
17753 for (i = j; i < nparts - 1; i++)
17754 if (CONST_INT_P (operands[7 + i])
17755 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
17756 operands[7 + i] = operands[2 + j];
17759 for (i = 0; i < nparts; i++)
17760 emit_move_insn (operands[2 + i], operands[6 + i]);
17765 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
17766 left shift by a constant, either using a single shift or
17767 a sequence of add instructions. */
17770 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
17774 emit_insn ((mode == DImode
17776 : gen_adddi3) (operand, operand, operand));
17778 else if (!optimize_insn_for_size_p ()
17779 && count * ix86_cost->add <= ix86_cost->shift_const)
17782 for (i=0; i<count; i++)
17784 emit_insn ((mode == DImode
17786 : gen_adddi3) (operand, operand, operand));
17790 emit_insn ((mode == DImode
17792 : gen_ashldi3) (operand, operand, GEN_INT (count)));
17796 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
17798 rtx low[2], high[2];
17800 const int single_width = mode == DImode ? 32 : 64;
17802 if (CONST_INT_P (operands[2]))
17804 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17805 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17807 if (count >= single_width)
17809 emit_move_insn (high[0], low[1]);
17810 emit_move_insn (low[0], const0_rtx);
17812 if (count > single_width)
17813 ix86_expand_ashl_const (high[0], count - single_width, mode);
17817 if (!rtx_equal_p (operands[0], operands[1]))
17818 emit_move_insn (operands[0], operands[1]);
17819 emit_insn ((mode == DImode
17821 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
17822 ix86_expand_ashl_const (low[0], count, mode);
17827 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17829 if (operands[1] == const1_rtx)
17831 /* Assuming we've chosen a QImode capable registers, then 1 << N
17832 can be done with two 32/64-bit shifts, no branches, no cmoves. */
17833 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
17835 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
17837 ix86_expand_clear (low[0]);
17838 ix86_expand_clear (high[0]);
17839 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
17841 d = gen_lowpart (QImode, low[0]);
17842 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
17843 s = gen_rtx_EQ (QImode, flags, const0_rtx);
17844 emit_insn (gen_rtx_SET (VOIDmode, d, s));
17846 d = gen_lowpart (QImode, high[0]);
17847 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
17848 s = gen_rtx_NE (QImode, flags, const0_rtx);
17849 emit_insn (gen_rtx_SET (VOIDmode, d, s));
17852 /* Otherwise, we can get the same results by manually performing
17853 a bit extract operation on bit 5/6, and then performing the two
17854 shifts. The two methods of getting 0/1 into low/high are exactly
17855 the same size. Avoiding the shift in the bit extract case helps
17856 pentium4 a bit; no one else seems to care much either way. */
17861 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
17862 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
17864 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
17865 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
17867 emit_insn ((mode == DImode
17869 : gen_lshrdi3) (high[0], high[0],
17870 GEN_INT (mode == DImode ? 5 : 6)));
17871 emit_insn ((mode == DImode
17873 : gen_anddi3) (high[0], high[0], const1_rtx));
17874 emit_move_insn (low[0], high[0]);
17875 emit_insn ((mode == DImode
17877 : gen_xordi3) (low[0], low[0], const1_rtx));
17880 emit_insn ((mode == DImode
17882 : gen_ashldi3) (low[0], low[0], operands[2]));
17883 emit_insn ((mode == DImode
17885 : gen_ashldi3) (high[0], high[0], operands[2]));
17889 if (operands[1] == constm1_rtx)
17891 /* For -1 << N, we can avoid the shld instruction, because we
17892 know that we're shifting 0...31/63 ones into a -1. */
17893 emit_move_insn (low[0], constm1_rtx);
17894 if (optimize_insn_for_size_p ())
17895 emit_move_insn (high[0], low[0]);
17897 emit_move_insn (high[0], constm1_rtx);
17901 if (!rtx_equal_p (operands[0], operands[1]))
17902 emit_move_insn (operands[0], operands[1]);
17904 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17905 emit_insn ((mode == DImode
17907 : gen_x86_64_shld) (high[0], low[0], operands[2]));
17910 emit_insn ((mode == DImode
17912 : gen_ashldi3) (low[0], low[0], operands[2]));
17914 if (TARGET_CMOVE && scratch)
17916 ix86_expand_clear (scratch);
17917 emit_insn ((mode == DImode
17918 ? gen_x86_shiftsi_adj_1
17919 : gen_x86_shiftdi_adj_1) (high[0], low[0], operands[2],
17923 emit_insn ((mode == DImode
17924 ? gen_x86_shiftsi_adj_2
17925 : gen_x86_shiftdi_adj_2) (high[0], low[0], operands[2]));
17929 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
17931 rtx low[2], high[2];
17933 const int single_width = mode == DImode ? 32 : 64;
17935 if (CONST_INT_P (operands[2]))
17937 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17938 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17940 if (count == single_width * 2 - 1)
17942 emit_move_insn (high[0], high[1]);
17943 emit_insn ((mode == DImode
17945 : gen_ashrdi3) (high[0], high[0],
17946 GEN_INT (single_width - 1)));
17947 emit_move_insn (low[0], high[0]);
17950 else if (count >= single_width)
17952 emit_move_insn (low[0], high[1]);
17953 emit_move_insn (high[0], low[0]);
17954 emit_insn ((mode == DImode
17956 : gen_ashrdi3) (high[0], high[0],
17957 GEN_INT (single_width - 1)));
17958 if (count > single_width)
17959 emit_insn ((mode == DImode
17961 : gen_ashrdi3) (low[0], low[0],
17962 GEN_INT (count - single_width)));
17966 if (!rtx_equal_p (operands[0], operands[1]))
17967 emit_move_insn (operands[0], operands[1]);
17968 emit_insn ((mode == DImode
17970 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
17971 emit_insn ((mode == DImode
17973 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
17978 if (!rtx_equal_p (operands[0], operands[1]))
17979 emit_move_insn (operands[0], operands[1]);
17981 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17983 emit_insn ((mode == DImode
17985 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
17986 emit_insn ((mode == DImode
17988 : gen_ashrdi3) (high[0], high[0], operands[2]));
17990 if (TARGET_CMOVE && scratch)
17992 emit_move_insn (scratch, high[0]);
17993 emit_insn ((mode == DImode
17995 : gen_ashrdi3) (scratch, scratch,
17996 GEN_INT (single_width - 1)));
17997 emit_insn ((mode == DImode
17998 ? gen_x86_shiftsi_adj_1
17999 : gen_x86_shiftdi_adj_1) (low[0], high[0], operands[2],
18003 emit_insn ((mode == DImode
18004 ? gen_x86_shiftsi_adj_3
18005 : gen_x86_shiftdi_adj_3) (low[0], high[0], operands[2]));
18010 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
18012 rtx low[2], high[2];
18014 const int single_width = mode == DImode ? 32 : 64;
18016 if (CONST_INT_P (operands[2]))
18018 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
18019 count = INTVAL (operands[2]) & (single_width * 2 - 1);
18021 if (count >= single_width)
18023 emit_move_insn (low[0], high[1]);
18024 ix86_expand_clear (high[0]);
18026 if (count > single_width)
18027 emit_insn ((mode == DImode
18029 : gen_lshrdi3) (low[0], low[0],
18030 GEN_INT (count - single_width)));
18034 if (!rtx_equal_p (operands[0], operands[1]))
18035 emit_move_insn (operands[0], operands[1]);
18036 emit_insn ((mode == DImode
18038 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
18039 emit_insn ((mode == DImode
18041 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
18046 if (!rtx_equal_p (operands[0], operands[1]))
18047 emit_move_insn (operands[0], operands[1]);
18049 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
18051 emit_insn ((mode == DImode
18053 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
18054 emit_insn ((mode == DImode
18056 : gen_lshrdi3) (high[0], high[0], operands[2]));
18058 /* Heh. By reversing the arguments, we can reuse this pattern. */
18059 if (TARGET_CMOVE && scratch)
18061 ix86_expand_clear (scratch);
18062 emit_insn ((mode == DImode
18063 ? gen_x86_shiftsi_adj_1
18064 : gen_x86_shiftdi_adj_1) (low[0], high[0], operands[2],
18068 emit_insn ((mode == DImode
18069 ? gen_x86_shiftsi_adj_2
18070 : gen_x86_shiftdi_adj_2) (low[0], high[0], operands[2]));
18074 /* Predict just emitted jump instruction to be taken with probability PROB. */
18076 predict_jump (int prob)
18078 rtx insn = get_last_insn ();
18079 gcc_assert (JUMP_P (insn));
18080 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
18083 /* Helper function for the string operations below. Dest VARIABLE whether
18084 it is aligned to VALUE bytes. If true, jump to the label. */
18086 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
18088 rtx label = gen_label_rtx ();
18089 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
18090 if (GET_MODE (variable) == DImode)
18091 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
18093 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
18094 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
18097 predict_jump (REG_BR_PROB_BASE * 50 / 100);
18099 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18103 /* Adjust COUNTER by the VALUE. */
18105 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
18107 if (GET_MODE (countreg) == DImode)
18108 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
18110 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
18113 /* Zero extend possibly SImode EXP to Pmode register. */
18115 ix86_zero_extend_to_Pmode (rtx exp)
18118 if (GET_MODE (exp) == VOIDmode)
18119 return force_reg (Pmode, exp);
18120 if (GET_MODE (exp) == Pmode)
18121 return copy_to_mode_reg (Pmode, exp);
18122 r = gen_reg_rtx (Pmode);
18123 emit_insn (gen_zero_extendsidi2 (r, exp));
18127 /* Divide COUNTREG by SCALE. */
18129 scale_counter (rtx countreg, int scale)
18135 if (CONST_INT_P (countreg))
18136 return GEN_INT (INTVAL (countreg) / scale);
18137 gcc_assert (REG_P (countreg));
18139 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
18140 GEN_INT (exact_log2 (scale)),
18141 NULL, 1, OPTAB_DIRECT);
18145 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
18146 DImode for constant loop counts. */
18148 static enum machine_mode
18149 counter_mode (rtx count_exp)
18151 if (GET_MODE (count_exp) != VOIDmode)
18152 return GET_MODE (count_exp);
18153 if (!CONST_INT_P (count_exp))
18155 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
18160 /* When SRCPTR is non-NULL, output simple loop to move memory
18161 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
18162 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
18163 equivalent loop to set memory by VALUE (supposed to be in MODE).
18165 The size is rounded down to whole number of chunk size moved at once.
18166 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
18170 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
18171 rtx destptr, rtx srcptr, rtx value,
18172 rtx count, enum machine_mode mode, int unroll,
18175 rtx out_label, top_label, iter, tmp;
18176 enum machine_mode iter_mode = counter_mode (count);
18177 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
18178 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
18184 top_label = gen_label_rtx ();
18185 out_label = gen_label_rtx ();
18186 iter = gen_reg_rtx (iter_mode);
18188 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
18189 NULL, 1, OPTAB_DIRECT);
18190 /* Those two should combine. */
18191 if (piece_size == const1_rtx)
18193 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
18195 predict_jump (REG_BR_PROB_BASE * 10 / 100);
18197 emit_move_insn (iter, const0_rtx);
18199 emit_label (top_label);
18201 tmp = convert_modes (Pmode, iter_mode, iter, true);
18202 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
18203 destmem = change_address (destmem, mode, x_addr);
18207 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
18208 srcmem = change_address (srcmem, mode, y_addr);
18210 /* When unrolling for chips that reorder memory reads and writes,
18211 we can save registers by using single temporary.
18212 Also using 4 temporaries is overkill in 32bit mode. */
18213 if (!TARGET_64BIT && 0)
18215 for (i = 0; i < unroll; i++)
18220 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
18222 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
18224 emit_move_insn (destmem, srcmem);
18230 gcc_assert (unroll <= 4);
18231 for (i = 0; i < unroll; i++)
18233 tmpreg[i] = gen_reg_rtx (mode);
18237 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
18239 emit_move_insn (tmpreg[i], srcmem);
18241 for (i = 0; i < unroll; i++)
18246 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
18248 emit_move_insn (destmem, tmpreg[i]);
18253 for (i = 0; i < unroll; i++)
18257 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
18258 emit_move_insn (destmem, value);
18261 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
18262 true, OPTAB_LIB_WIDEN);
18264 emit_move_insn (iter, tmp);
18266 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
18268 if (expected_size != -1)
18270 expected_size /= GET_MODE_SIZE (mode) * unroll;
18271 if (expected_size == 0)
18273 else if (expected_size > REG_BR_PROB_BASE)
18274 predict_jump (REG_BR_PROB_BASE - 1);
18276 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
18279 predict_jump (REG_BR_PROB_BASE * 80 / 100);
18280 iter = ix86_zero_extend_to_Pmode (iter);
18281 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
18282 true, OPTAB_LIB_WIDEN);
18283 if (tmp != destptr)
18284 emit_move_insn (destptr, tmp);
18287 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
18288 true, OPTAB_LIB_WIDEN);
18290 emit_move_insn (srcptr, tmp);
18292 emit_label (out_label);
18295 /* Output "rep; mov" instruction.
18296 Arguments have same meaning as for previous function */
18298 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
18299 rtx destptr, rtx srcptr,
18301 enum machine_mode mode)
18307 /* If the size is known, it is shorter to use rep movs. */
18308 if (mode == QImode && CONST_INT_P (count)
18309 && !(INTVAL (count) & 3))
18312 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
18313 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
18314 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
18315 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
18316 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
18317 if (mode != QImode)
18319 destexp = gen_rtx_ASHIFT (Pmode, countreg,
18320 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
18321 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
18322 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
18323 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
18324 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
18328 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
18329 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
18331 if (CONST_INT_P (count))
18333 count = GEN_INT (INTVAL (count)
18334 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
18335 destmem = shallow_copy_rtx (destmem);
18336 srcmem = shallow_copy_rtx (srcmem);
18337 set_mem_size (destmem, count);
18338 set_mem_size (srcmem, count);
18342 if (MEM_SIZE (destmem))
18343 set_mem_size (destmem, NULL_RTX);
18344 if (MEM_SIZE (srcmem))
18345 set_mem_size (srcmem, NULL_RTX);
18347 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
18351 /* Output "rep; stos" instruction.
18352 Arguments have same meaning as for previous function */
18354 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
18355 rtx count, enum machine_mode mode,
18361 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
18362 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
18363 value = force_reg (mode, gen_lowpart (mode, value));
18364 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
18365 if (mode != QImode)
18367 destexp = gen_rtx_ASHIFT (Pmode, countreg,
18368 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
18369 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
18372 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
18373 if (orig_value == const0_rtx && CONST_INT_P (count))
18375 count = GEN_INT (INTVAL (count)
18376 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
18377 destmem = shallow_copy_rtx (destmem);
18378 set_mem_size (destmem, count);
18380 else if (MEM_SIZE (destmem))
18381 set_mem_size (destmem, NULL_RTX);
18382 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
18386 emit_strmov (rtx destmem, rtx srcmem,
18387 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
18389 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
18390 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
18391 emit_insn (gen_strmov (destptr, dest, srcptr, src));
18394 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
18396 expand_movmem_epilogue (rtx destmem, rtx srcmem,
18397 rtx destptr, rtx srcptr, rtx count, int max_size)
18400 if (CONST_INT_P (count))
18402 HOST_WIDE_INT countval = INTVAL (count);
18405 if ((countval & 0x10) && max_size > 16)
18409 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
18410 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
18413 gcc_unreachable ();
18416 if ((countval & 0x08) && max_size > 8)
18419 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
18422 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
18423 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
18427 if ((countval & 0x04) && max_size > 4)
18429 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
18432 if ((countval & 0x02) && max_size > 2)
18434 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
18437 if ((countval & 0x01) && max_size > 1)
18439 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
18446 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
18447 count, 1, OPTAB_DIRECT);
18448 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
18449 count, QImode, 1, 4);
18453 /* When there are stringops, we can cheaply increase dest and src pointers.
18454 Otherwise we save code size by maintaining offset (zero is readily
18455 available from preceding rep operation) and using x86 addressing modes.
18457 if (TARGET_SINGLE_STRINGOP)
18461 rtx label = ix86_expand_aligntest (count, 4, true);
18462 src = change_address (srcmem, SImode, srcptr);
18463 dest = change_address (destmem, SImode, destptr);
18464 emit_insn (gen_strmov (destptr, dest, srcptr, src));
18465 emit_label (label);
18466 LABEL_NUSES (label) = 1;
18470 rtx label = ix86_expand_aligntest (count, 2, true);
18471 src = change_address (srcmem, HImode, srcptr);
18472 dest = change_address (destmem, HImode, destptr);
18473 emit_insn (gen_strmov (destptr, dest, srcptr, src));
18474 emit_label (label);
18475 LABEL_NUSES (label) = 1;
18479 rtx label = ix86_expand_aligntest (count, 1, true);
18480 src = change_address (srcmem, QImode, srcptr);
18481 dest = change_address (destmem, QImode, destptr);
18482 emit_insn (gen_strmov (destptr, dest, srcptr, src));
18483 emit_label (label);
18484 LABEL_NUSES (label) = 1;
18489 rtx offset = force_reg (Pmode, const0_rtx);
18494 rtx label = ix86_expand_aligntest (count, 4, true);
18495 src = change_address (srcmem, SImode, srcptr);
18496 dest = change_address (destmem, SImode, destptr);
18497 emit_move_insn (dest, src);
18498 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
18499 true, OPTAB_LIB_WIDEN);
18501 emit_move_insn (offset, tmp);
18502 emit_label (label);
18503 LABEL_NUSES (label) = 1;
18507 rtx label = ix86_expand_aligntest (count, 2, true);
18508 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
18509 src = change_address (srcmem, HImode, tmp);
18510 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
18511 dest = change_address (destmem, HImode, tmp);
18512 emit_move_insn (dest, src);
18513 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
18514 true, OPTAB_LIB_WIDEN);
18516 emit_move_insn (offset, tmp);
18517 emit_label (label);
18518 LABEL_NUSES (label) = 1;
18522 rtx label = ix86_expand_aligntest (count, 1, true);
18523 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
18524 src = change_address (srcmem, QImode, tmp);
18525 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
18526 dest = change_address (destmem, QImode, tmp);
18527 emit_move_insn (dest, src);
18528 emit_label (label);
18529 LABEL_NUSES (label) = 1;
18534 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
18536 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
18537 rtx count, int max_size)
18540 expand_simple_binop (counter_mode (count), AND, count,
18541 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
18542 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
18543 gen_lowpart (QImode, value), count, QImode,
18547 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
18549 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
18553 if (CONST_INT_P (count))
18555 HOST_WIDE_INT countval = INTVAL (count);
18558 if ((countval & 0x10) && max_size > 16)
18562 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
18563 emit_insn (gen_strset (destptr, dest, value));
18564 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
18565 emit_insn (gen_strset (destptr, dest, value));
18568 gcc_unreachable ();
18571 if ((countval & 0x08) && max_size > 8)
18575 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
18576 emit_insn (gen_strset (destptr, dest, value));
18580 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
18581 emit_insn (gen_strset (destptr, dest, value));
18582 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
18583 emit_insn (gen_strset (destptr, dest, value));
18587 if ((countval & 0x04) && max_size > 4)
18589 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
18590 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
18593 if ((countval & 0x02) && max_size > 2)
18595 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
18596 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
18599 if ((countval & 0x01) && max_size > 1)
18601 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
18602 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
18609 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
18614 rtx label = ix86_expand_aligntest (count, 16, true);
18617 dest = change_address (destmem, DImode, destptr);
18618 emit_insn (gen_strset (destptr, dest, value));
18619 emit_insn (gen_strset (destptr, dest, value));
18623 dest = change_address (destmem, SImode, destptr);
18624 emit_insn (gen_strset (destptr, dest, value));
18625 emit_insn (gen_strset (destptr, dest, value));
18626 emit_insn (gen_strset (destptr, dest, value));
18627 emit_insn (gen_strset (destptr, dest, value));
18629 emit_label (label);
18630 LABEL_NUSES (label) = 1;
18634 rtx label = ix86_expand_aligntest (count, 8, true);
18637 dest = change_address (destmem, DImode, destptr);
18638 emit_insn (gen_strset (destptr, dest, value));
18642 dest = change_address (destmem, SImode, destptr);
18643 emit_insn (gen_strset (destptr, dest, value));
18644 emit_insn (gen_strset (destptr, dest, value));
18646 emit_label (label);
18647 LABEL_NUSES (label) = 1;
18651 rtx label = ix86_expand_aligntest (count, 4, true);
18652 dest = change_address (destmem, SImode, destptr);
18653 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
18654 emit_label (label);
18655 LABEL_NUSES (label) = 1;
18659 rtx label = ix86_expand_aligntest (count, 2, true);
18660 dest = change_address (destmem, HImode, destptr);
18661 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
18662 emit_label (label);
18663 LABEL_NUSES (label) = 1;
18667 rtx label = ix86_expand_aligntest (count, 1, true);
18668 dest = change_address (destmem, QImode, destptr);
18669 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
18670 emit_label (label);
18671 LABEL_NUSES (label) = 1;
18675 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
18676 DESIRED_ALIGNMENT. */
18678 expand_movmem_prologue (rtx destmem, rtx srcmem,
18679 rtx destptr, rtx srcptr, rtx count,
18680 int align, int desired_alignment)
18682 if (align <= 1 && desired_alignment > 1)
18684 rtx label = ix86_expand_aligntest (destptr, 1, false);
18685 srcmem = change_address (srcmem, QImode, srcptr);
18686 destmem = change_address (destmem, QImode, destptr);
18687 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
18688 ix86_adjust_counter (count, 1);
18689 emit_label (label);
18690 LABEL_NUSES (label) = 1;
18692 if (align <= 2 && desired_alignment > 2)
18694 rtx label = ix86_expand_aligntest (destptr, 2, false);
18695 srcmem = change_address (srcmem, HImode, srcptr);
18696 destmem = change_address (destmem, HImode, destptr);
18697 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
18698 ix86_adjust_counter (count, 2);
18699 emit_label (label);
18700 LABEL_NUSES (label) = 1;
18702 if (align <= 4 && desired_alignment > 4)
18704 rtx label = ix86_expand_aligntest (destptr, 4, false);
18705 srcmem = change_address (srcmem, SImode, srcptr);
18706 destmem = change_address (destmem, SImode, destptr);
18707 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
18708 ix86_adjust_counter (count, 4);
18709 emit_label (label);
18710 LABEL_NUSES (label) = 1;
18712 gcc_assert (desired_alignment <= 8);
18715 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
18716 ALIGN_BYTES is how many bytes need to be copied. */
18718 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
18719 int desired_align, int align_bytes)
18722 rtx src_size, dst_size;
18724 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
18725 if (src_align_bytes >= 0)
18726 src_align_bytes = desired_align - src_align_bytes;
18727 src_size = MEM_SIZE (src);
18728 dst_size = MEM_SIZE (dst);
18729 if (align_bytes & 1)
18731 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
18732 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
18734 emit_insn (gen_strmov (destreg, dst, srcreg, src));
18736 if (align_bytes & 2)
18738 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
18739 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
18740 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
18741 set_mem_align (dst, 2 * BITS_PER_UNIT);
18742 if (src_align_bytes >= 0
18743 && (src_align_bytes & 1) == (align_bytes & 1)
18744 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
18745 set_mem_align (src, 2 * BITS_PER_UNIT);
18747 emit_insn (gen_strmov (destreg, dst, srcreg, src));
18749 if (align_bytes & 4)
18751 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
18752 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
18753 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
18754 set_mem_align (dst, 4 * BITS_PER_UNIT);
18755 if (src_align_bytes >= 0)
18757 unsigned int src_align = 0;
18758 if ((src_align_bytes & 3) == (align_bytes & 3))
18760 else if ((src_align_bytes & 1) == (align_bytes & 1))
18762 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
18763 set_mem_align (src, src_align * BITS_PER_UNIT);
18766 emit_insn (gen_strmov (destreg, dst, srcreg, src));
18768 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
18769 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
18770 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
18771 set_mem_align (dst, desired_align * BITS_PER_UNIT);
18772 if (src_align_bytes >= 0)
18774 unsigned int src_align = 0;
18775 if ((src_align_bytes & 7) == (align_bytes & 7))
18777 else if ((src_align_bytes & 3) == (align_bytes & 3))
18779 else if ((src_align_bytes & 1) == (align_bytes & 1))
18781 if (src_align > (unsigned int) desired_align)
18782 src_align = desired_align;
18783 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
18784 set_mem_align (src, src_align * BITS_PER_UNIT);
18787 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
18789 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
18794 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
18795 DESIRED_ALIGNMENT. */
18797 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
18798 int align, int desired_alignment)
18800 if (align <= 1 && desired_alignment > 1)
18802 rtx label = ix86_expand_aligntest (destptr, 1, false);
18803 destmem = change_address (destmem, QImode, destptr);
18804 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
18805 ix86_adjust_counter (count, 1);
18806 emit_label (label);
18807 LABEL_NUSES (label) = 1;
18809 if (align <= 2 && desired_alignment > 2)
18811 rtx label = ix86_expand_aligntest (destptr, 2, false);
18812 destmem = change_address (destmem, HImode, destptr);
18813 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
18814 ix86_adjust_counter (count, 2);
18815 emit_label (label);
18816 LABEL_NUSES (label) = 1;
18818 if (align <= 4 && desired_alignment > 4)
18820 rtx label = ix86_expand_aligntest (destptr, 4, false);
18821 destmem = change_address (destmem, SImode, destptr);
18822 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
18823 ix86_adjust_counter (count, 4);
18824 emit_label (label);
18825 LABEL_NUSES (label) = 1;
18827 gcc_assert (desired_alignment <= 8);
18830 /* Set enough from DST to align DST known to by aligned by ALIGN to
18831 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
18833 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
18834 int desired_align, int align_bytes)
18837 rtx dst_size = MEM_SIZE (dst);
18838 if (align_bytes & 1)
18840 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
18842 emit_insn (gen_strset (destreg, dst,
18843 gen_lowpart (QImode, value)));
18845 if (align_bytes & 2)
18847 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
18848 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
18849 set_mem_align (dst, 2 * BITS_PER_UNIT);
18851 emit_insn (gen_strset (destreg, dst,
18852 gen_lowpart (HImode, value)));
18854 if (align_bytes & 4)
18856 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
18857 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
18858 set_mem_align (dst, 4 * BITS_PER_UNIT);
18860 emit_insn (gen_strset (destreg, dst,
18861 gen_lowpart (SImode, value)));
18863 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
18864 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
18865 set_mem_align (dst, desired_align * BITS_PER_UNIT);
18867 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
18871 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
18872 static enum stringop_alg
18873 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
18874 int *dynamic_check)
18876 const struct stringop_algs * algs;
18877 bool optimize_for_speed;
18878 /* Algorithms using the rep prefix want at least edi and ecx;
18879 additionally, memset wants eax and memcpy wants esi. Don't
18880 consider such algorithms if the user has appropriated those
18881 registers for their own purposes. */
18882 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
18884 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
18886 #define ALG_USABLE_P(alg) (rep_prefix_usable \
18887 || (alg != rep_prefix_1_byte \
18888 && alg != rep_prefix_4_byte \
18889 && alg != rep_prefix_8_byte))
18890 const struct processor_costs *cost;
18892 /* Even if the string operation call is cold, we still might spend a lot
18893 of time processing large blocks. */
18894 if (optimize_function_for_size_p (cfun)
18895 || (optimize_insn_for_size_p ()
18896 && expected_size != -1 && expected_size < 256))
18897 optimize_for_speed = false;
18899 optimize_for_speed = true;
18901 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
18903 *dynamic_check = -1;
18905 algs = &cost->memset[TARGET_64BIT != 0];
18907 algs = &cost->memcpy[TARGET_64BIT != 0];
18908 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
18909 return stringop_alg;
18910 /* rep; movq or rep; movl is the smallest variant. */
18911 else if (!optimize_for_speed)
18913 if (!count || (count & 3))
18914 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
18916 return rep_prefix_usable ? rep_prefix_4_byte : loop;
18918 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
18920 else if (expected_size != -1 && expected_size < 4)
18921 return loop_1_byte;
18922 else if (expected_size != -1)
18925 enum stringop_alg alg = libcall;
18926 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
18928 /* We get here if the algorithms that were not libcall-based
18929 were rep-prefix based and we are unable to use rep prefixes
18930 based on global register usage. Break out of the loop and
18931 use the heuristic below. */
18932 if (algs->size[i].max == 0)
18934 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
18936 enum stringop_alg candidate = algs->size[i].alg;
18938 if (candidate != libcall && ALG_USABLE_P (candidate))
18940 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
18941 last non-libcall inline algorithm. */
18942 if (TARGET_INLINE_ALL_STRINGOPS)
18944 /* When the current size is best to be copied by a libcall,
18945 but we are still forced to inline, run the heuristic below
18946 that will pick code for medium sized blocks. */
18947 if (alg != libcall)
18951 else if (ALG_USABLE_P (candidate))
18955 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
18957 /* When asked to inline the call anyway, try to pick meaningful choice.
18958 We look for maximal size of block that is faster to copy by hand and
18959 take blocks of at most of that size guessing that average size will
18960 be roughly half of the block.
18962 If this turns out to be bad, we might simply specify the preferred
18963 choice in ix86_costs. */
18964 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18965 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
18968 enum stringop_alg alg;
18970 bool any_alg_usable_p = true;
18972 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
18974 enum stringop_alg candidate = algs->size[i].alg;
18975 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
18977 if (candidate != libcall && candidate
18978 && ALG_USABLE_P (candidate))
18979 max = algs->size[i].max;
18981 /* If there aren't any usable algorithms, then recursing on
18982 smaller sizes isn't going to find anything. Just return the
18983 simple byte-at-a-time copy loop. */
18984 if (!any_alg_usable_p)
18986 /* Pick something reasonable. */
18987 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18988 *dynamic_check = 128;
18989 return loop_1_byte;
18993 alg = decide_alg (count, max / 2, memset, dynamic_check);
18994 gcc_assert (*dynamic_check == -1);
18995 gcc_assert (alg != libcall);
18996 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18997 *dynamic_check = max;
19000 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
19001 #undef ALG_USABLE_P
19004 /* Decide on alignment. We know that the operand is already aligned to ALIGN
19005 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
19007 decide_alignment (int align,
19008 enum stringop_alg alg,
19011 int desired_align = 0;
19015 gcc_unreachable ();
19017 case unrolled_loop:
19018 desired_align = GET_MODE_SIZE (Pmode);
19020 case rep_prefix_8_byte:
19023 case rep_prefix_4_byte:
19024 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
19025 copying whole cacheline at once. */
19026 if (TARGET_PENTIUMPRO)
19031 case rep_prefix_1_byte:
19032 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
19033 copying whole cacheline at once. */
19034 if (TARGET_PENTIUMPRO)
19048 if (desired_align < align)
19049 desired_align = align;
19050 if (expected_size != -1 && expected_size < 4)
19051 desired_align = align;
19052 return desired_align;
19055 /* Return the smallest power of 2 greater than VAL. */
19057 smallest_pow2_greater_than (int val)
19065 /* Expand string move (memcpy) operation. Use i386 string operations when
19066 profitable. expand_setmem contains similar code. The code depends upon
19067 architecture, block size and alignment, but always has the same
19070 1) Prologue guard: Conditional that jumps up to epilogues for small
19071 blocks that can be handled by epilogue alone. This is faster but
19072 also needed for correctness, since prologue assume the block is larger
19073 than the desired alignment.
19075 Optional dynamic check for size and libcall for large
19076 blocks is emitted here too, with -minline-stringops-dynamically.
19078 2) Prologue: copy first few bytes in order to get destination aligned
19079 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
19080 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
19081 We emit either a jump tree on power of two sized blocks, or a byte loop.
19083 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
19084 with specified algorithm.
19086 4) Epilogue: code copying tail of the block that is too small to be
19087 handled by main body (or up to size guarded by prologue guard). */
19090 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
19091 rtx expected_align_exp, rtx expected_size_exp)
19097 rtx jump_around_label = NULL;
19098 HOST_WIDE_INT align = 1;
19099 unsigned HOST_WIDE_INT count = 0;
19100 HOST_WIDE_INT expected_size = -1;
19101 int size_needed = 0, epilogue_size_needed;
19102 int desired_align = 0, align_bytes = 0;
19103 enum stringop_alg alg;
19105 bool need_zero_guard = false;
19107 if (CONST_INT_P (align_exp))
19108 align = INTVAL (align_exp);
19109 /* i386 can do misaligned access on reasonably increased cost. */
19110 if (CONST_INT_P (expected_align_exp)
19111 && INTVAL (expected_align_exp) > align)
19112 align = INTVAL (expected_align_exp);
19113 /* ALIGN is the minimum of destination and source alignment, but we care here
19114 just about destination alignment. */
19115 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
19116 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
19118 if (CONST_INT_P (count_exp))
19119 count = expected_size = INTVAL (count_exp);
19120 if (CONST_INT_P (expected_size_exp) && count == 0)
19121 expected_size = INTVAL (expected_size_exp);
19123 /* Make sure we don't need to care about overflow later on. */
19124 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
19127 /* Step 0: Decide on preferred algorithm, desired alignment and
19128 size of chunks to be copied by main loop. */
19130 alg = decide_alg (count, expected_size, false, &dynamic_check);
19131 desired_align = decide_alignment (align, alg, expected_size);
19133 if (!TARGET_ALIGN_STRINGOPS)
19134 align = desired_align;
19136 if (alg == libcall)
19138 gcc_assert (alg != no_stringop);
19140 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
19141 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
19142 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
19147 gcc_unreachable ();
19149 need_zero_guard = true;
19150 size_needed = GET_MODE_SIZE (Pmode);
19152 case unrolled_loop:
19153 need_zero_guard = true;
19154 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
19156 case rep_prefix_8_byte:
19159 case rep_prefix_4_byte:
19162 case rep_prefix_1_byte:
19166 need_zero_guard = true;
19171 epilogue_size_needed = size_needed;
19173 /* Step 1: Prologue guard. */
19175 /* Alignment code needs count to be in register. */
19176 if (CONST_INT_P (count_exp) && desired_align > align)
19178 if (INTVAL (count_exp) > desired_align
19179 && INTVAL (count_exp) > size_needed)
19182 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
19183 if (align_bytes <= 0)
19186 align_bytes = desired_align - align_bytes;
19188 if (align_bytes == 0)
19189 count_exp = force_reg (counter_mode (count_exp), count_exp);
19191 gcc_assert (desired_align >= 1 && align >= 1);
19193 /* Ensure that alignment prologue won't copy past end of block. */
19194 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
19196 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
19197 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
19198 Make sure it is power of 2. */
19199 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
19203 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
19205 /* If main algorithm works on QImode, no epilogue is needed.
19206 For small sizes just don't align anything. */
19207 if (size_needed == 1)
19208 desired_align = align;
19215 label = gen_label_rtx ();
19216 emit_cmp_and_jump_insns (count_exp,
19217 GEN_INT (epilogue_size_needed),
19218 LTU, 0, counter_mode (count_exp), 1, label);
19219 if (expected_size == -1 || expected_size < epilogue_size_needed)
19220 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19222 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19226 /* Emit code to decide on runtime whether library call or inline should be
19228 if (dynamic_check != -1)
19230 if (CONST_INT_P (count_exp))
19232 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
19234 emit_block_move_via_libcall (dst, src, count_exp, false);
19235 count_exp = const0_rtx;
19241 rtx hot_label = gen_label_rtx ();
19242 jump_around_label = gen_label_rtx ();
19243 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
19244 LEU, 0, GET_MODE (count_exp), 1, hot_label);
19245 predict_jump (REG_BR_PROB_BASE * 90 / 100);
19246 emit_block_move_via_libcall (dst, src, count_exp, false);
19247 emit_jump (jump_around_label);
19248 emit_label (hot_label);
19252 /* Step 2: Alignment prologue. */
19254 if (desired_align > align)
19256 if (align_bytes == 0)
19258 /* Except for the first move in epilogue, we no longer know
19259 constant offset in aliasing info. It don't seems to worth
19260 the pain to maintain it for the first move, so throw away
19262 src = change_address (src, BLKmode, srcreg);
19263 dst = change_address (dst, BLKmode, destreg);
19264 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
19269 /* If we know how many bytes need to be stored before dst is
19270 sufficiently aligned, maintain aliasing info accurately. */
19271 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
19272 desired_align, align_bytes);
19273 count_exp = plus_constant (count_exp, -align_bytes);
19274 count -= align_bytes;
19276 if (need_zero_guard
19277 && (count < (unsigned HOST_WIDE_INT) size_needed
19278 || (align_bytes == 0
19279 && count < ((unsigned HOST_WIDE_INT) size_needed
19280 + desired_align - align))))
19282 /* It is possible that we copied enough so the main loop will not
19284 gcc_assert (size_needed > 1);
19285 if (label == NULL_RTX)
19286 label = gen_label_rtx ();
19287 emit_cmp_and_jump_insns (count_exp,
19288 GEN_INT (size_needed),
19289 LTU, 0, counter_mode (count_exp), 1, label);
19290 if (expected_size == -1
19291 || expected_size < (desired_align - align) / 2 + size_needed)
19292 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19294 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19297 if (label && size_needed == 1)
19299 emit_label (label);
19300 LABEL_NUSES (label) = 1;
19302 epilogue_size_needed = 1;
19304 else if (label == NULL_RTX)
19305 epilogue_size_needed = size_needed;
19307 /* Step 3: Main loop. */
19313 gcc_unreachable ();
19315 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
19316 count_exp, QImode, 1, expected_size);
19319 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
19320 count_exp, Pmode, 1, expected_size);
19322 case unrolled_loop:
19323 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
19324 registers for 4 temporaries anyway. */
19325 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
19326 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
19329 case rep_prefix_8_byte:
19330 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
19333 case rep_prefix_4_byte:
19334 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
19337 case rep_prefix_1_byte:
19338 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
19342 /* Adjust properly the offset of src and dest memory for aliasing. */
19343 if (CONST_INT_P (count_exp))
19345 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
19346 (count / size_needed) * size_needed);
19347 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
19348 (count / size_needed) * size_needed);
19352 src = change_address (src, BLKmode, srcreg);
19353 dst = change_address (dst, BLKmode, destreg);
19356 /* Step 4: Epilogue to copy the remaining bytes. */
19360 /* When the main loop is done, COUNT_EXP might hold original count,
19361 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
19362 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
19363 bytes. Compensate if needed. */
19365 if (size_needed < epilogue_size_needed)
19368 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
19369 GEN_INT (size_needed - 1), count_exp, 1,
19371 if (tmp != count_exp)
19372 emit_move_insn (count_exp, tmp);
19374 emit_label (label);
19375 LABEL_NUSES (label) = 1;
19378 if (count_exp != const0_rtx && epilogue_size_needed > 1)
19379 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
19380 epilogue_size_needed);
19381 if (jump_around_label)
19382 emit_label (jump_around_label);
19386 /* Helper function for memcpy. For QImode value 0xXY produce
19387 0xXYXYXYXY of wide specified by MODE. This is essentially
19388 a * 0x10101010, but we can do slightly better than
19389 synth_mult by unwinding the sequence by hand on CPUs with
19392 promote_duplicated_reg (enum machine_mode mode, rtx val)
19394 enum machine_mode valmode = GET_MODE (val);
19396 int nops = mode == DImode ? 3 : 2;
19398 gcc_assert (mode == SImode || mode == DImode);
19399 if (val == const0_rtx)
19400 return copy_to_mode_reg (mode, const0_rtx);
19401 if (CONST_INT_P (val))
19403 HOST_WIDE_INT v = INTVAL (val) & 255;
19407 if (mode == DImode)
19408 v |= (v << 16) << 16;
19409 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
19412 if (valmode == VOIDmode)
19414 if (valmode != QImode)
19415 val = gen_lowpart (QImode, val);
19416 if (mode == QImode)
19418 if (!TARGET_PARTIAL_REG_STALL)
19420 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
19421 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
19422 <= (ix86_cost->shift_const + ix86_cost->add) * nops
19423 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
19425 rtx reg = convert_modes (mode, QImode, val, true);
19426 tmp = promote_duplicated_reg (mode, const1_rtx);
19427 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
19432 rtx reg = convert_modes (mode, QImode, val, true);
19434 if (!TARGET_PARTIAL_REG_STALL)
19435 if (mode == SImode)
19436 emit_insn (gen_movsi_insv_1 (reg, reg));
19438 emit_insn (gen_movdi_insv_1 (reg, reg));
19441 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
19442 NULL, 1, OPTAB_DIRECT);
19444 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
19446 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
19447 NULL, 1, OPTAB_DIRECT);
19448 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
19449 if (mode == SImode)
19451 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
19452 NULL, 1, OPTAB_DIRECT);
19453 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
19458 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
19459 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
19460 alignment from ALIGN to DESIRED_ALIGN. */
19462 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
19467 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
19468 promoted_val = promote_duplicated_reg (DImode, val);
19469 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
19470 promoted_val = promote_duplicated_reg (SImode, val);
19471 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
19472 promoted_val = promote_duplicated_reg (HImode, val);
19474 promoted_val = val;
19476 return promoted_val;
19479 /* Expand string clear operation (bzero). Use i386 string operations when
19480 profitable. See expand_movmem comment for explanation of individual
19481 steps performed. */
19483 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
19484 rtx expected_align_exp, rtx expected_size_exp)
19489 rtx jump_around_label = NULL;
19490 HOST_WIDE_INT align = 1;
19491 unsigned HOST_WIDE_INT count = 0;
19492 HOST_WIDE_INT expected_size = -1;
19493 int size_needed = 0, epilogue_size_needed;
19494 int desired_align = 0, align_bytes = 0;
19495 enum stringop_alg alg;
19496 rtx promoted_val = NULL;
19497 bool force_loopy_epilogue = false;
19499 bool need_zero_guard = false;
19501 if (CONST_INT_P (align_exp))
19502 align = INTVAL (align_exp);
19503 /* i386 can do misaligned access on reasonably increased cost. */
19504 if (CONST_INT_P (expected_align_exp)
19505 && INTVAL (expected_align_exp) > align)
19506 align = INTVAL (expected_align_exp);
19507 if (CONST_INT_P (count_exp))
19508 count = expected_size = INTVAL (count_exp);
19509 if (CONST_INT_P (expected_size_exp) && count == 0)
19510 expected_size = INTVAL (expected_size_exp);
19512 /* Make sure we don't need to care about overflow later on. */
19513 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
19516 /* Step 0: Decide on preferred algorithm, desired alignment and
19517 size of chunks to be copied by main loop. */
19519 alg = decide_alg (count, expected_size, true, &dynamic_check);
19520 desired_align = decide_alignment (align, alg, expected_size);
19522 if (!TARGET_ALIGN_STRINGOPS)
19523 align = desired_align;
19525 if (alg == libcall)
19527 gcc_assert (alg != no_stringop);
19529 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
19530 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
19535 gcc_unreachable ();
19537 need_zero_guard = true;
19538 size_needed = GET_MODE_SIZE (Pmode);
19540 case unrolled_loop:
19541 need_zero_guard = true;
19542 size_needed = GET_MODE_SIZE (Pmode) * 4;
19544 case rep_prefix_8_byte:
19547 case rep_prefix_4_byte:
19550 case rep_prefix_1_byte:
19554 need_zero_guard = true;
19558 epilogue_size_needed = size_needed;
19560 /* Step 1: Prologue guard. */
19562 /* Alignment code needs count to be in register. */
19563 if (CONST_INT_P (count_exp) && desired_align > align)
19565 if (INTVAL (count_exp) > desired_align
19566 && INTVAL (count_exp) > size_needed)
19569 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
19570 if (align_bytes <= 0)
19573 align_bytes = desired_align - align_bytes;
19575 if (align_bytes == 0)
19577 enum machine_mode mode = SImode;
19578 if (TARGET_64BIT && (count & ~0xffffffff))
19580 count_exp = force_reg (mode, count_exp);
19583 /* Do the cheap promotion to allow better CSE across the
19584 main loop and epilogue (ie one load of the big constant in the
19585 front of all code. */
19586 if (CONST_INT_P (val_exp))
19587 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
19588 desired_align, align);
19589 /* Ensure that alignment prologue won't copy past end of block. */
19590 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
19592 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
19593 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
19594 Make sure it is power of 2. */
19595 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
19597 /* To improve performance of small blocks, we jump around the VAL
19598 promoting mode. This mean that if the promoted VAL is not constant,
19599 we might not use it in the epilogue and have to use byte
19601 if (epilogue_size_needed > 2 && !promoted_val)
19602 force_loopy_epilogue = true;
19605 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
19607 /* If main algorithm works on QImode, no epilogue is needed.
19608 For small sizes just don't align anything. */
19609 if (size_needed == 1)
19610 desired_align = align;
19617 label = gen_label_rtx ();
19618 emit_cmp_and_jump_insns (count_exp,
19619 GEN_INT (epilogue_size_needed),
19620 LTU, 0, counter_mode (count_exp), 1, label);
19621 if (expected_size == -1 || expected_size <= epilogue_size_needed)
19622 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19624 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19627 if (dynamic_check != -1)
19629 rtx hot_label = gen_label_rtx ();
19630 jump_around_label = gen_label_rtx ();
19631 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
19632 LEU, 0, counter_mode (count_exp), 1, hot_label);
19633 predict_jump (REG_BR_PROB_BASE * 90 / 100);
19634 set_storage_via_libcall (dst, count_exp, val_exp, false);
19635 emit_jump (jump_around_label);
19636 emit_label (hot_label);
19639 /* Step 2: Alignment prologue. */
19641 /* Do the expensive promotion once we branched off the small blocks. */
19643 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
19644 desired_align, align);
19645 gcc_assert (desired_align >= 1 && align >= 1);
19647 if (desired_align > align)
19649 if (align_bytes == 0)
19651 /* Except for the first move in epilogue, we no longer know
19652 constant offset in aliasing info. It don't seems to worth
19653 the pain to maintain it for the first move, so throw away
19655 dst = change_address (dst, BLKmode, destreg);
19656 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
19661 /* If we know how many bytes need to be stored before dst is
19662 sufficiently aligned, maintain aliasing info accurately. */
19663 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
19664 desired_align, align_bytes);
19665 count_exp = plus_constant (count_exp, -align_bytes);
19666 count -= align_bytes;
19668 if (need_zero_guard
19669 && (count < (unsigned HOST_WIDE_INT) size_needed
19670 || (align_bytes == 0
19671 && count < ((unsigned HOST_WIDE_INT) size_needed
19672 + desired_align - align))))
19674 /* It is possible that we copied enough so the main loop will not
19676 gcc_assert (size_needed > 1);
19677 if (label == NULL_RTX)
19678 label = gen_label_rtx ();
19679 emit_cmp_and_jump_insns (count_exp,
19680 GEN_INT (size_needed),
19681 LTU, 0, counter_mode (count_exp), 1, label);
19682 if (expected_size == -1
19683 || expected_size < (desired_align - align) / 2 + size_needed)
19684 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19686 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19689 if (label && size_needed == 1)
19691 emit_label (label);
19692 LABEL_NUSES (label) = 1;
19694 promoted_val = val_exp;
19695 epilogue_size_needed = 1;
19697 else if (label == NULL_RTX)
19698 epilogue_size_needed = size_needed;
19700 /* Step 3: Main loop. */
19706 gcc_unreachable ();
19708 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
19709 count_exp, QImode, 1, expected_size);
19712 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
19713 count_exp, Pmode, 1, expected_size);
19715 case unrolled_loop:
19716 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
19717 count_exp, Pmode, 4, expected_size);
19719 case rep_prefix_8_byte:
19720 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
19723 case rep_prefix_4_byte:
19724 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
19727 case rep_prefix_1_byte:
19728 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
19732 /* Adjust properly the offset of src and dest memory for aliasing. */
19733 if (CONST_INT_P (count_exp))
19734 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
19735 (count / size_needed) * size_needed);
19737 dst = change_address (dst, BLKmode, destreg);
19739 /* Step 4: Epilogue to copy the remaining bytes. */
19743 /* When the main loop is done, COUNT_EXP might hold original count,
19744 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
19745 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
19746 bytes. Compensate if needed. */
19748 if (size_needed < epilogue_size_needed)
19751 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
19752 GEN_INT (size_needed - 1), count_exp, 1,
19754 if (tmp != count_exp)
19755 emit_move_insn (count_exp, tmp);
19757 emit_label (label);
19758 LABEL_NUSES (label) = 1;
19761 if (count_exp != const0_rtx && epilogue_size_needed > 1)
19763 if (force_loopy_epilogue)
19764 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
19765 epilogue_size_needed);
19767 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
19768 epilogue_size_needed);
19770 if (jump_around_label)
19771 emit_label (jump_around_label);
19775 /* Expand the appropriate insns for doing strlen if not just doing
19778 out = result, initialized with the start address
19779 align_rtx = alignment of the address.
19780 scratch = scratch register, initialized with the startaddress when
19781 not aligned, otherwise undefined
19783 This is just the body. It needs the initializations mentioned above and
19784 some address computing at the end. These things are done in i386.md. */
19787 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
19791 rtx align_2_label = NULL_RTX;
19792 rtx align_3_label = NULL_RTX;
19793 rtx align_4_label = gen_label_rtx ();
19794 rtx end_0_label = gen_label_rtx ();
19796 rtx tmpreg = gen_reg_rtx (SImode);
19797 rtx scratch = gen_reg_rtx (SImode);
19801 if (CONST_INT_P (align_rtx))
19802 align = INTVAL (align_rtx);
19804 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
19806 /* Is there a known alignment and is it less than 4? */
19809 rtx scratch1 = gen_reg_rtx (Pmode);
19810 emit_move_insn (scratch1, out);
19811 /* Is there a known alignment and is it not 2? */
19814 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
19815 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
19817 /* Leave just the 3 lower bits. */
19818 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
19819 NULL_RTX, 0, OPTAB_WIDEN);
19821 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
19822 Pmode, 1, align_4_label);
19823 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
19824 Pmode, 1, align_2_label);
19825 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
19826 Pmode, 1, align_3_label);
19830 /* Since the alignment is 2, we have to check 2 or 0 bytes;
19831 check if is aligned to 4 - byte. */
19833 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
19834 NULL_RTX, 0, OPTAB_WIDEN);
19836 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
19837 Pmode, 1, align_4_label);
19840 mem = change_address (src, QImode, out);
19842 /* Now compare the bytes. */
19844 /* Compare the first n unaligned byte on a byte per byte basis. */
19845 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
19846 QImode, 1, end_0_label);
19848 /* Increment the address. */
19849 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
19851 /* Not needed with an alignment of 2 */
19854 emit_label (align_2_label);
19856 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
19859 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
19861 emit_label (align_3_label);
19864 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
19867 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
19870 /* Generate loop to check 4 bytes at a time. It is not a good idea to
19871 align this loop. It gives only huge programs, but does not help to
19873 emit_label (align_4_label);
19875 mem = change_address (src, SImode, out);
19876 emit_move_insn (scratch, mem);
19877 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
19879 /* This formula yields a nonzero result iff one of the bytes is zero.
19880 This saves three branches inside loop and many cycles. */
19882 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
19883 emit_insn (gen_one_cmplsi2 (scratch, scratch));
19884 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
19885 emit_insn (gen_andsi3 (tmpreg, tmpreg,
19886 gen_int_mode (0x80808080, SImode)));
19887 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
19892 rtx reg = gen_reg_rtx (SImode);
19893 rtx reg2 = gen_reg_rtx (Pmode);
19894 emit_move_insn (reg, tmpreg);
19895 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
19897 /* If zero is not in the first two bytes, move two bytes forward. */
19898 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
19899 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19900 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
19901 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
19902 gen_rtx_IF_THEN_ELSE (SImode, tmp,
19905 /* Emit lea manually to avoid clobbering of flags. */
19906 emit_insn (gen_rtx_SET (SImode, reg2,
19907 gen_rtx_PLUS (Pmode, out, const2_rtx)));
19909 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19910 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
19911 emit_insn (gen_rtx_SET (VOIDmode, out,
19912 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
19918 rtx end_2_label = gen_label_rtx ();
19919 /* Is zero in the first two bytes? */
19921 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
19922 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19923 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
19924 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19925 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
19927 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19928 JUMP_LABEL (tmp) = end_2_label;
19930 /* Not in the first two. Move two bytes forward. */
19931 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
19932 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
19934 emit_label (end_2_label);
19938 /* Avoid branch in fixing the byte. */
19939 tmpreg = gen_lowpart (QImode, tmpreg);
19940 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
19941 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
19942 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
19943 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
19945 emit_label (end_0_label);
19948 /* Expand strlen. */
19951 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
19953 rtx addr, scratch1, scratch2, scratch3, scratch4;
19955 /* The generic case of strlen expander is long. Avoid it's
19956 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
19958 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
19959 && !TARGET_INLINE_ALL_STRINGOPS
19960 && !optimize_insn_for_size_p ()
19961 && (!CONST_INT_P (align) || INTVAL (align) < 4))
19964 addr = force_reg (Pmode, XEXP (src, 0));
19965 scratch1 = gen_reg_rtx (Pmode);
19967 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
19968 && !optimize_insn_for_size_p ())
19970 /* Well it seems that some optimizer does not combine a call like
19971 foo(strlen(bar), strlen(bar));
19972 when the move and the subtraction is done here. It does calculate
19973 the length just once when these instructions are done inside of
19974 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
19975 often used and I use one fewer register for the lifetime of
19976 output_strlen_unroll() this is better. */
19978 emit_move_insn (out, addr);
19980 ix86_expand_strlensi_unroll_1 (out, src, align);
19982 /* strlensi_unroll_1 returns the address of the zero at the end of
19983 the string, like memchr(), so compute the length by subtracting
19984 the start address. */
19985 emit_insn (ix86_gen_sub3 (out, out, addr));
19991 /* Can't use this if the user has appropriated eax, ecx, or edi. */
19992 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
19995 scratch2 = gen_reg_rtx (Pmode);
19996 scratch3 = gen_reg_rtx (Pmode);
19997 scratch4 = force_reg (Pmode, constm1_rtx);
19999 emit_move_insn (scratch3, addr);
20000 eoschar = force_reg (QImode, eoschar);
20002 src = replace_equiv_address_nv (src, scratch3);
20004 /* If .md starts supporting :P, this can be done in .md. */
20005 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
20006 scratch4), UNSPEC_SCAS);
20007 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
20008 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
20009 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
20014 /* For given symbol (function) construct code to compute address of it's PLT
20015 entry in large x86-64 PIC model. */
20017 construct_plt_address (rtx symbol)
20019 rtx tmp = gen_reg_rtx (Pmode);
20020 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
20022 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
20023 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
20025 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
20026 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
20031 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
20033 rtx pop, int sibcall)
20035 rtx use = NULL, call;
20037 if (pop == const0_rtx)
20039 gcc_assert (!TARGET_64BIT || !pop);
20041 if (TARGET_MACHO && !TARGET_64BIT)
20044 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
20045 fnaddr = machopic_indirect_call_target (fnaddr);
20050 /* Static functions and indirect calls don't need the pic register. */
20051 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
20052 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
20053 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
20054 use_reg (&use, pic_offset_table_rtx);
20057 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
20059 rtx al = gen_rtx_REG (QImode, AX_REG);
20060 emit_move_insn (al, callarg2);
20061 use_reg (&use, al);
20064 if (ix86_cmodel == CM_LARGE_PIC
20066 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
20067 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
20068 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
20070 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
20071 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
20073 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
20074 fnaddr = gen_rtx_MEM (QImode, fnaddr);
20077 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
20079 call = gen_rtx_SET (VOIDmode, retval, call);
20082 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
20083 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
20084 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
20087 && ix86_cfun_abi () == MS_ABI
20088 && (!callarg2 || INTVAL (callarg2) != -2))
20090 /* We need to represent that SI and DI registers are clobbered
20092 static int clobbered_registers[] = {
20093 XMM6_REG, XMM7_REG, XMM8_REG,
20094 XMM9_REG, XMM10_REG, XMM11_REG,
20095 XMM12_REG, XMM13_REG, XMM14_REG,
20096 XMM15_REG, SI_REG, DI_REG
20099 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
20100 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
20101 UNSPEC_MS_TO_SYSV_CALL);
20105 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
20106 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
20109 (SSE_REGNO_P (clobbered_registers[i])
20111 clobbered_registers[i]));
20113 call = gen_rtx_PARALLEL (VOIDmode,
20114 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
20118 call = emit_call_insn (call);
20120 CALL_INSN_FUNCTION_USAGE (call) = use;
20124 /* Clear stack slot assignments remembered from previous functions.
20125 This is called from INIT_EXPANDERS once before RTL is emitted for each
20128 static struct machine_function *
20129 ix86_init_machine_status (void)
20131 struct machine_function *f;
20133 f = ggc_alloc_cleared_machine_function ();
20134 f->use_fast_prologue_epilogue_nregs = -1;
20135 f->tls_descriptor_call_expanded_p = 0;
20136 f->call_abi = ix86_abi;
20141 /* Return a MEM corresponding to a stack slot with mode MODE.
20142 Allocate a new slot if necessary.
20144 The RTL for a function can have several slots available: N is
20145 which slot to use. */
20148 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
20150 struct stack_local_entry *s;
20152 gcc_assert (n < MAX_386_STACK_LOCALS);
20154 /* Virtual slot is valid only before vregs are instantiated. */
20155 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
20157 for (s = ix86_stack_locals; s; s = s->next)
20158 if (s->mode == mode && s->n == n)
20159 return copy_rtx (s->rtl);
20161 s = ggc_alloc_stack_local_entry ();
20164 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
20166 s->next = ix86_stack_locals;
20167 ix86_stack_locals = s;
20171 /* Construct the SYMBOL_REF for the tls_get_addr function. */
20173 static GTY(()) rtx ix86_tls_symbol;
20175 ix86_tls_get_addr (void)
20178 if (!ix86_tls_symbol)
20180 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
20181 (TARGET_ANY_GNU_TLS
20183 ? "___tls_get_addr"
20184 : "__tls_get_addr");
20187 return ix86_tls_symbol;
20190 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
20192 static GTY(()) rtx ix86_tls_module_base_symbol;
20194 ix86_tls_module_base (void)
20197 if (!ix86_tls_module_base_symbol)
20199 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
20200 "_TLS_MODULE_BASE_");
20201 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
20202 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
20205 return ix86_tls_module_base_symbol;
20208 /* Calculate the length of the memory address in the instruction
20209 encoding. Does not include the one-byte modrm, opcode, or prefix. */
20212 memory_address_length (rtx addr)
20214 struct ix86_address parts;
20215 rtx base, index, disp;
20219 if (GET_CODE (addr) == PRE_DEC
20220 || GET_CODE (addr) == POST_INC
20221 || GET_CODE (addr) == PRE_MODIFY
20222 || GET_CODE (addr) == POST_MODIFY)
20225 ok = ix86_decompose_address (addr, &parts);
20228 if (parts.base && GET_CODE (parts.base) == SUBREG)
20229 parts.base = SUBREG_REG (parts.base);
20230 if (parts.index && GET_CODE (parts.index) == SUBREG)
20231 parts.index = SUBREG_REG (parts.index);
20234 index = parts.index;
20239 - esp as the base always wants an index,
20240 - ebp as the base always wants a displacement,
20241 - r12 as the base always wants an index,
20242 - r13 as the base always wants a displacement. */
20244 /* Register Indirect. */
20245 if (base && !index && !disp)
20247 /* esp (for its index) and ebp (for its displacement) need
20248 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
20251 && (addr == arg_pointer_rtx
20252 || addr == frame_pointer_rtx
20253 || REGNO (addr) == SP_REG
20254 || REGNO (addr) == BP_REG
20255 || REGNO (addr) == R12_REG
20256 || REGNO (addr) == R13_REG))
20260 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
20261 is not disp32, but disp32(%rip), so for disp32
20262 SIB byte is needed, unless print_operand_address
20263 optimizes it into disp32(%rip) or (%rip) is implied
20265 else if (disp && !base && !index)
20272 if (GET_CODE (disp) == CONST)
20273 symbol = XEXP (disp, 0);
20274 if (GET_CODE (symbol) == PLUS
20275 && CONST_INT_P (XEXP (symbol, 1)))
20276 symbol = XEXP (symbol, 0);
20278 if (GET_CODE (symbol) != LABEL_REF
20279 && (GET_CODE (symbol) != SYMBOL_REF
20280 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
20281 && (GET_CODE (symbol) != UNSPEC
20282 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
20283 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
20290 /* Find the length of the displacement constant. */
20293 if (base && satisfies_constraint_K (disp))
20298 /* ebp always wants a displacement. Similarly r13. */
20299 else if (base && REG_P (base)
20300 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
20303 /* An index requires the two-byte modrm form.... */
20305 /* ...like esp (or r12), which always wants an index. */
20306 || base == arg_pointer_rtx
20307 || base == frame_pointer_rtx
20308 || (base && REG_P (base)
20309 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
20326 /* Compute default value for "length_immediate" attribute. When SHORTFORM
20327 is set, expect that insn have 8bit immediate alternative. */
20329 ix86_attr_length_immediate_default (rtx insn, int shortform)
20333 extract_insn_cached (insn);
20334 for (i = recog_data.n_operands - 1; i >= 0; --i)
20335 if (CONSTANT_P (recog_data.operand[i]))
20337 enum attr_mode mode = get_attr_mode (insn);
20340 if (shortform && CONST_INT_P (recog_data.operand[i]))
20342 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
20349 ival = trunc_int_for_mode (ival, HImode);
20352 ival = trunc_int_for_mode (ival, SImode);
20357 if (IN_RANGE (ival, -128, 127))
20374 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
20379 fatal_insn ("unknown insn mode", insn);
20384 /* Compute default value for "length_address" attribute. */
20386 ix86_attr_length_address_default (rtx insn)
20390 if (get_attr_type (insn) == TYPE_LEA)
20392 rtx set = PATTERN (insn), addr;
20394 if (GET_CODE (set) == PARALLEL)
20395 set = XVECEXP (set, 0, 0);
20397 gcc_assert (GET_CODE (set) == SET);
20399 addr = SET_SRC (set);
20400 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
20402 if (GET_CODE (addr) == ZERO_EXTEND)
20403 addr = XEXP (addr, 0);
20404 if (GET_CODE (addr) == SUBREG)
20405 addr = SUBREG_REG (addr);
20408 return memory_address_length (addr);
20411 extract_insn_cached (insn);
20412 for (i = recog_data.n_operands - 1; i >= 0; --i)
20413 if (MEM_P (recog_data.operand[i]))
20415 constrain_operands_cached (reload_completed);
20416 if (which_alternative != -1)
20418 const char *constraints = recog_data.constraints[i];
20419 int alt = which_alternative;
20421 while (*constraints == '=' || *constraints == '+')
20424 while (*constraints++ != ',')
20426 /* Skip ignored operands. */
20427 if (*constraints == 'X')
20430 return memory_address_length (XEXP (recog_data.operand[i], 0));
20435 /* Compute default value for "length_vex" attribute. It includes
20436 2 or 3 byte VEX prefix and 1 opcode byte. */
20439 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
20444 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
20445 byte VEX prefix. */
20446 if (!has_0f_opcode || has_vex_w)
20449 /* We can always use 2 byte VEX prefix in 32bit. */
20453 extract_insn_cached (insn);
20455 for (i = recog_data.n_operands - 1; i >= 0; --i)
20456 if (REG_P (recog_data.operand[i]))
20458 /* REX.W bit uses 3 byte VEX prefix. */
20459 if (GET_MODE (recog_data.operand[i]) == DImode
20460 && GENERAL_REG_P (recog_data.operand[i]))
20465 /* REX.X or REX.B bits use 3 byte VEX prefix. */
20466 if (MEM_P (recog_data.operand[i])
20467 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
20474 /* Return the maximum number of instructions a cpu can issue. */
20477 ix86_issue_rate (void)
20481 case PROCESSOR_PENTIUM:
20482 case PROCESSOR_ATOM:
20486 case PROCESSOR_PENTIUMPRO:
20487 case PROCESSOR_PENTIUM4:
20488 case PROCESSOR_ATHLON:
20490 case PROCESSOR_AMDFAM10:
20491 case PROCESSOR_NOCONA:
20492 case PROCESSOR_GENERIC32:
20493 case PROCESSOR_GENERIC64:
20494 case PROCESSOR_BDVER1:
20497 case PROCESSOR_CORE2:
20505 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
20506 by DEP_INSN and nothing set by DEP_INSN. */
20509 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
20513 /* Simplify the test for uninteresting insns. */
20514 if (insn_type != TYPE_SETCC
20515 && insn_type != TYPE_ICMOV
20516 && insn_type != TYPE_FCMOV
20517 && insn_type != TYPE_IBR)
20520 if ((set = single_set (dep_insn)) != 0)
20522 set = SET_DEST (set);
20525 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
20526 && XVECLEN (PATTERN (dep_insn), 0) == 2
20527 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
20528 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
20530 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
20531 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
20536 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
20539 /* This test is true if the dependent insn reads the flags but
20540 not any other potentially set register. */
20541 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
20544 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
20550 /* Return true iff USE_INSN has a memory address with operands set by
20554 ix86_agi_dependent (rtx set_insn, rtx use_insn)
20557 extract_insn_cached (use_insn);
20558 for (i = recog_data.n_operands - 1; i >= 0; --i)
20559 if (MEM_P (recog_data.operand[i]))
20561 rtx addr = XEXP (recog_data.operand[i], 0);
20562 return modified_in_p (addr, set_insn) != 0;
20568 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
20570 enum attr_type insn_type, dep_insn_type;
20571 enum attr_memory memory;
20573 int dep_insn_code_number;
20575 /* Anti and output dependencies have zero cost on all CPUs. */
20576 if (REG_NOTE_KIND (link) != 0)
20579 dep_insn_code_number = recog_memoized (dep_insn);
20581 /* If we can't recognize the insns, we can't really do anything. */
20582 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
20585 insn_type = get_attr_type (insn);
20586 dep_insn_type = get_attr_type (dep_insn);
20590 case PROCESSOR_PENTIUM:
20591 /* Address Generation Interlock adds a cycle of latency. */
20592 if (insn_type == TYPE_LEA)
20594 rtx addr = PATTERN (insn);
20596 if (GET_CODE (addr) == PARALLEL)
20597 addr = XVECEXP (addr, 0, 0);
20599 gcc_assert (GET_CODE (addr) == SET);
20601 addr = SET_SRC (addr);
20602 if (modified_in_p (addr, dep_insn))
20605 else if (ix86_agi_dependent (dep_insn, insn))
20608 /* ??? Compares pair with jump/setcc. */
20609 if (ix86_flags_dependent (insn, dep_insn, insn_type))
20612 /* Floating point stores require value to be ready one cycle earlier. */
20613 if (insn_type == TYPE_FMOV
20614 && get_attr_memory (insn) == MEMORY_STORE
20615 && !ix86_agi_dependent (dep_insn, insn))
20619 case PROCESSOR_PENTIUMPRO:
20620 memory = get_attr_memory (insn);
20622 /* INT->FP conversion is expensive. */
20623 if (get_attr_fp_int_src (dep_insn))
20626 /* There is one cycle extra latency between an FP op and a store. */
20627 if (insn_type == TYPE_FMOV
20628 && (set = single_set (dep_insn)) != NULL_RTX
20629 && (set2 = single_set (insn)) != NULL_RTX
20630 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
20631 && MEM_P (SET_DEST (set2)))
20634 /* Show ability of reorder buffer to hide latency of load by executing
20635 in parallel with previous instruction in case
20636 previous instruction is not needed to compute the address. */
20637 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
20638 && !ix86_agi_dependent (dep_insn, insn))
20640 /* Claim moves to take one cycle, as core can issue one load
20641 at time and the next load can start cycle later. */
20642 if (dep_insn_type == TYPE_IMOV
20643 || dep_insn_type == TYPE_FMOV)
20651 memory = get_attr_memory (insn);
20653 /* The esp dependency is resolved before the instruction is really
20655 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
20656 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
20659 /* INT->FP conversion is expensive. */
20660 if (get_attr_fp_int_src (dep_insn))
20663 /* Show ability of reorder buffer to hide latency of load by executing
20664 in parallel with previous instruction in case
20665 previous instruction is not needed to compute the address. */
20666 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
20667 && !ix86_agi_dependent (dep_insn, insn))
20669 /* Claim moves to take one cycle, as core can issue one load
20670 at time and the next load can start cycle later. */
20671 if (dep_insn_type == TYPE_IMOV
20672 || dep_insn_type == TYPE_FMOV)
20681 case PROCESSOR_ATHLON:
20683 case PROCESSOR_AMDFAM10:
20684 case PROCESSOR_BDVER1:
20685 case PROCESSOR_ATOM:
20686 case PROCESSOR_GENERIC32:
20687 case PROCESSOR_GENERIC64:
20688 memory = get_attr_memory (insn);
20690 /* Show ability of reorder buffer to hide latency of load by executing
20691 in parallel with previous instruction in case
20692 previous instruction is not needed to compute the address. */
20693 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
20694 && !ix86_agi_dependent (dep_insn, insn))
20696 enum attr_unit unit = get_attr_unit (insn);
20699 /* Because of the difference between the length of integer and
20700 floating unit pipeline preparation stages, the memory operands
20701 for floating point are cheaper.
20703 ??? For Athlon it the difference is most probably 2. */
20704 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
20707 loadcost = TARGET_ATHLON ? 2 : 0;
20709 if (cost >= loadcost)
20722 /* How many alternative schedules to try. This should be as wide as the
20723 scheduling freedom in the DFA, but no wider. Making this value too
20724 large results extra work for the scheduler. */
20727 ia32_multipass_dfa_lookahead (void)
20731 case PROCESSOR_PENTIUM:
20734 case PROCESSOR_PENTIUMPRO:
20744 /* Compute the alignment given to a constant that is being placed in memory.
20745 EXP is the constant and ALIGN is the alignment that the object would
20747 The value of this function is used instead of that alignment to align
20751 ix86_constant_alignment (tree exp, int align)
20753 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
20754 || TREE_CODE (exp) == INTEGER_CST)
20756 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
20758 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
20761 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
20762 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
20763 return BITS_PER_WORD;
20768 /* Compute the alignment for a static variable.
20769 TYPE is the data type, and ALIGN is the alignment that
20770 the object would ordinarily have. The value of this function is used
20771 instead of that alignment to align the object. */
20774 ix86_data_alignment (tree type, int align)
20776 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
20778 if (AGGREGATE_TYPE_P (type)
20779 && TYPE_SIZE (type)
20780 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
20781 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
20782 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
20783 && align < max_align)
20786 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
20787 to 16byte boundary. */
20790 if (AGGREGATE_TYPE_P (type)
20791 && TYPE_SIZE (type)
20792 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
20793 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
20794 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
20798 if (TREE_CODE (type) == ARRAY_TYPE)
20800 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
20802 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
20805 else if (TREE_CODE (type) == COMPLEX_TYPE)
20808 if (TYPE_MODE (type) == DCmode && align < 64)
20810 if ((TYPE_MODE (type) == XCmode
20811 || TYPE_MODE (type) == TCmode) && align < 128)
20814 else if ((TREE_CODE (type) == RECORD_TYPE
20815 || TREE_CODE (type) == UNION_TYPE
20816 || TREE_CODE (type) == QUAL_UNION_TYPE)
20817 && TYPE_FIELDS (type))
20819 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
20821 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
20824 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
20825 || TREE_CODE (type) == INTEGER_TYPE)
20827 if (TYPE_MODE (type) == DFmode && align < 64)
20829 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
20836 /* Compute the alignment for a local variable or a stack slot. EXP is
20837 the data type or decl itself, MODE is the widest mode available and
20838 ALIGN is the alignment that the object would ordinarily have. The
20839 value of this macro is used instead of that alignment to align the
20843 ix86_local_alignment (tree exp, enum machine_mode mode,
20844 unsigned int align)
20848 if (exp && DECL_P (exp))
20850 type = TREE_TYPE (exp);
20859 /* Don't do dynamic stack realignment for long long objects with
20860 -mpreferred-stack-boundary=2. */
20863 && ix86_preferred_stack_boundary < 64
20864 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
20865 && (!type || !TYPE_USER_ALIGN (type))
20866 && (!decl || !DECL_USER_ALIGN (decl)))
20869 /* If TYPE is NULL, we are allocating a stack slot for caller-save
20870 register in MODE. We will return the largest alignment of XF
20874 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
20875 align = GET_MODE_ALIGNMENT (DFmode);
20879 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
20880 to 16byte boundary. Exact wording is:
20882 An array uses the same alignment as its elements, except that a local or
20883 global array variable of length at least 16 bytes or
20884 a C99 variable-length array variable always has alignment of at least 16 bytes.
20886 This was added to allow use of aligned SSE instructions at arrays. This
20887 rule is meant for static storage (where compiler can not do the analysis
20888 by itself). We follow it for automatic variables only when convenient.
20889 We fully control everything in the function compiled and functions from
20890 other unit can not rely on the alignment.
20892 Exclude va_list type. It is the common case of local array where
20893 we can not benefit from the alignment. */
20894 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
20897 if (AGGREGATE_TYPE_P (type)
20898 && (TYPE_MAIN_VARIANT (type)
20899 != TYPE_MAIN_VARIANT (va_list_type_node))
20900 && TYPE_SIZE (type)
20901 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
20902 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
20903 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
20906 if (TREE_CODE (type) == ARRAY_TYPE)
20908 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
20910 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
20913 else if (TREE_CODE (type) == COMPLEX_TYPE)
20915 if (TYPE_MODE (type) == DCmode && align < 64)
20917 if ((TYPE_MODE (type) == XCmode
20918 || TYPE_MODE (type) == TCmode) && align < 128)
20921 else if ((TREE_CODE (type) == RECORD_TYPE
20922 || TREE_CODE (type) == UNION_TYPE
20923 || TREE_CODE (type) == QUAL_UNION_TYPE)
20924 && TYPE_FIELDS (type))
20926 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
20928 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
20931 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
20932 || TREE_CODE (type) == INTEGER_TYPE)
20935 if (TYPE_MODE (type) == DFmode && align < 64)
20937 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
20943 /* Compute the minimum required alignment for dynamic stack realignment
20944 purposes for a local variable, parameter or a stack slot. EXP is
20945 the data type or decl itself, MODE is its mode and ALIGN is the
20946 alignment that the object would ordinarily have. */
20949 ix86_minimum_alignment (tree exp, enum machine_mode mode,
20950 unsigned int align)
20954 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
20957 if (exp && DECL_P (exp))
20959 type = TREE_TYPE (exp);
20968 /* Don't do dynamic stack realignment for long long objects with
20969 -mpreferred-stack-boundary=2. */
20970 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
20971 && (!type || !TYPE_USER_ALIGN (type))
20972 && (!decl || !DECL_USER_ALIGN (decl)))
20978 /* Find a location for the static chain incoming to a nested function.
20979 This is a register, unless all free registers are used by arguments. */
20982 ix86_static_chain (const_tree fndecl, bool incoming_p)
20986 if (!DECL_STATIC_CHAIN (fndecl))
20991 /* We always use R10 in 64-bit mode. */
20997 /* By default in 32-bit mode we use ECX to pass the static chain. */
21000 fntype = TREE_TYPE (fndecl);
21001 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
21003 /* Fastcall functions use ecx/edx for arguments, which leaves
21004 us with EAX for the static chain. */
21007 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
21009 /* Thiscall functions use ecx for arguments, which leaves
21010 us with EAX for the static chain. */
21013 else if (ix86_function_regparm (fntype, fndecl) == 3)
21015 /* For regparm 3, we have no free call-clobbered registers in
21016 which to store the static chain. In order to implement this,
21017 we have the trampoline push the static chain to the stack.
21018 However, we can't push a value below the return address when
21019 we call the nested function directly, so we have to use an
21020 alternate entry point. For this we use ESI, and have the
21021 alternate entry point push ESI, so that things appear the
21022 same once we're executing the nested function. */
21025 if (fndecl == current_function_decl)
21026 ix86_static_chain_on_stack = true;
21027 return gen_frame_mem (SImode,
21028 plus_constant (arg_pointer_rtx, -8));
21034 return gen_rtx_REG (Pmode, regno);
21037 /* Emit RTL insns to initialize the variable parts of a trampoline.
21038 FNDECL is the decl of the target address; M_TRAMP is a MEM for
21039 the trampoline, and CHAIN_VALUE is an RTX for the static chain
21040 to be passed to the target function. */
21043 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
21047 fnaddr = XEXP (DECL_RTL (fndecl), 0);
21054 /* Depending on the static chain location, either load a register
21055 with a constant, or push the constant to the stack. All of the
21056 instructions are the same size. */
21057 chain = ix86_static_chain (fndecl, true);
21060 if (REGNO (chain) == CX_REG)
21062 else if (REGNO (chain) == AX_REG)
21065 gcc_unreachable ();
21070 mem = adjust_address (m_tramp, QImode, 0);
21071 emit_move_insn (mem, gen_int_mode (opcode, QImode));
21073 mem = adjust_address (m_tramp, SImode, 1);
21074 emit_move_insn (mem, chain_value);
21076 /* Compute offset from the end of the jmp to the target function.
21077 In the case in which the trampoline stores the static chain on
21078 the stack, we need to skip the first insn which pushes the
21079 (call-saved) register static chain; this push is 1 byte. */
21080 disp = expand_binop (SImode, sub_optab, fnaddr,
21081 plus_constant (XEXP (m_tramp, 0),
21082 MEM_P (chain) ? 9 : 10),
21083 NULL_RTX, 1, OPTAB_DIRECT);
21085 mem = adjust_address (m_tramp, QImode, 5);
21086 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
21088 mem = adjust_address (m_tramp, SImode, 6);
21089 emit_move_insn (mem, disp);
21095 /* Load the function address to r11. Try to load address using
21096 the shorter movl instead of movabs. We may want to support
21097 movq for kernel mode, but kernel does not use trampolines at
21099 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
21101 fnaddr = copy_to_mode_reg (DImode, fnaddr);
21103 mem = adjust_address (m_tramp, HImode, offset);
21104 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
21106 mem = adjust_address (m_tramp, SImode, offset + 2);
21107 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
21112 mem = adjust_address (m_tramp, HImode, offset);
21113 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
21115 mem = adjust_address (m_tramp, DImode, offset + 2);
21116 emit_move_insn (mem, fnaddr);
21120 /* Load static chain using movabs to r10. */
21121 mem = adjust_address (m_tramp, HImode, offset);
21122 emit_move_insn (mem, gen_int_mode (0xba49, HImode));
21124 mem = adjust_address (m_tramp, DImode, offset + 2);
21125 emit_move_insn (mem, chain_value);
21128 /* Jump to r11; the last (unused) byte is a nop, only there to
21129 pad the write out to a single 32-bit store. */
21130 mem = adjust_address (m_tramp, SImode, offset);
21131 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
21134 gcc_assert (offset <= TRAMPOLINE_SIZE);
21137 #ifdef ENABLE_EXECUTE_STACK
21138 #ifdef CHECK_EXECUTE_STACK_ENABLED
21139 if (CHECK_EXECUTE_STACK_ENABLED)
21141 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
21142 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
21146 /* The following file contains several enumerations and data structures
21147 built from the definitions in i386-builtin-types.def. */
21149 #include "i386-builtin-types.inc"
21151 /* Table for the ix86 builtin non-function types. */
21152 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
21154 /* Retrieve an element from the above table, building some of
21155 the types lazily. */
21158 ix86_get_builtin_type (enum ix86_builtin_type tcode)
21160 unsigned int index;
21163 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
21165 type = ix86_builtin_type_tab[(int) tcode];
21169 gcc_assert (tcode > IX86_BT_LAST_PRIM);
21170 if (tcode <= IX86_BT_LAST_VECT)
21172 enum machine_mode mode;
21174 index = tcode - IX86_BT_LAST_PRIM - 1;
21175 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
21176 mode = ix86_builtin_type_vect_mode[index];
21178 type = build_vector_type_for_mode (itype, mode);
21184 index = tcode - IX86_BT_LAST_VECT - 1;
21185 if (tcode <= IX86_BT_LAST_PTR)
21186 quals = TYPE_UNQUALIFIED;
21188 quals = TYPE_QUAL_CONST;
21190 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
21191 if (quals != TYPE_UNQUALIFIED)
21192 itype = build_qualified_type (itype, quals);
21194 type = build_pointer_type (itype);
21197 ix86_builtin_type_tab[(int) tcode] = type;
21201 /* Table for the ix86 builtin function types. */
21202 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
21204 /* Retrieve an element from the above table, building some of
21205 the types lazily. */
21208 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
21212 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
21214 type = ix86_builtin_func_type_tab[(int) tcode];
21218 if (tcode <= IX86_BT_LAST_FUNC)
21220 unsigned start = ix86_builtin_func_start[(int) tcode];
21221 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
21222 tree rtype, atype, args = void_list_node;
21225 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
21226 for (i = after - 1; i > start; --i)
21228 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
21229 args = tree_cons (NULL, atype, args);
21232 type = build_function_type (rtype, args);
21236 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
21237 enum ix86_builtin_func_type icode;
21239 icode = ix86_builtin_func_alias_base[index];
21240 type = ix86_get_builtin_func_type (icode);
21243 ix86_builtin_func_type_tab[(int) tcode] = type;
21248 /* Codes for all the SSE/MMX builtins. */
21251 IX86_BUILTIN_ADDPS,
21252 IX86_BUILTIN_ADDSS,
21253 IX86_BUILTIN_DIVPS,
21254 IX86_BUILTIN_DIVSS,
21255 IX86_BUILTIN_MULPS,
21256 IX86_BUILTIN_MULSS,
21257 IX86_BUILTIN_SUBPS,
21258 IX86_BUILTIN_SUBSS,
21260 IX86_BUILTIN_CMPEQPS,
21261 IX86_BUILTIN_CMPLTPS,
21262 IX86_BUILTIN_CMPLEPS,
21263 IX86_BUILTIN_CMPGTPS,
21264 IX86_BUILTIN_CMPGEPS,
21265 IX86_BUILTIN_CMPNEQPS,
21266 IX86_BUILTIN_CMPNLTPS,
21267 IX86_BUILTIN_CMPNLEPS,
21268 IX86_BUILTIN_CMPNGTPS,
21269 IX86_BUILTIN_CMPNGEPS,
21270 IX86_BUILTIN_CMPORDPS,
21271 IX86_BUILTIN_CMPUNORDPS,
21272 IX86_BUILTIN_CMPEQSS,
21273 IX86_BUILTIN_CMPLTSS,
21274 IX86_BUILTIN_CMPLESS,
21275 IX86_BUILTIN_CMPNEQSS,
21276 IX86_BUILTIN_CMPNLTSS,
21277 IX86_BUILTIN_CMPNLESS,
21278 IX86_BUILTIN_CMPNGTSS,
21279 IX86_BUILTIN_CMPNGESS,
21280 IX86_BUILTIN_CMPORDSS,
21281 IX86_BUILTIN_CMPUNORDSS,
21283 IX86_BUILTIN_COMIEQSS,
21284 IX86_BUILTIN_COMILTSS,
21285 IX86_BUILTIN_COMILESS,
21286 IX86_BUILTIN_COMIGTSS,
21287 IX86_BUILTIN_COMIGESS,
21288 IX86_BUILTIN_COMINEQSS,
21289 IX86_BUILTIN_UCOMIEQSS,
21290 IX86_BUILTIN_UCOMILTSS,
21291 IX86_BUILTIN_UCOMILESS,
21292 IX86_BUILTIN_UCOMIGTSS,
21293 IX86_BUILTIN_UCOMIGESS,
21294 IX86_BUILTIN_UCOMINEQSS,
21296 IX86_BUILTIN_CVTPI2PS,
21297 IX86_BUILTIN_CVTPS2PI,
21298 IX86_BUILTIN_CVTSI2SS,
21299 IX86_BUILTIN_CVTSI642SS,
21300 IX86_BUILTIN_CVTSS2SI,
21301 IX86_BUILTIN_CVTSS2SI64,
21302 IX86_BUILTIN_CVTTPS2PI,
21303 IX86_BUILTIN_CVTTSS2SI,
21304 IX86_BUILTIN_CVTTSS2SI64,
21306 IX86_BUILTIN_MAXPS,
21307 IX86_BUILTIN_MAXSS,
21308 IX86_BUILTIN_MINPS,
21309 IX86_BUILTIN_MINSS,
21311 IX86_BUILTIN_LOADUPS,
21312 IX86_BUILTIN_STOREUPS,
21313 IX86_BUILTIN_MOVSS,
21315 IX86_BUILTIN_MOVHLPS,
21316 IX86_BUILTIN_MOVLHPS,
21317 IX86_BUILTIN_LOADHPS,
21318 IX86_BUILTIN_LOADLPS,
21319 IX86_BUILTIN_STOREHPS,
21320 IX86_BUILTIN_STORELPS,
21322 IX86_BUILTIN_MASKMOVQ,
21323 IX86_BUILTIN_MOVMSKPS,
21324 IX86_BUILTIN_PMOVMSKB,
21326 IX86_BUILTIN_MOVNTPS,
21327 IX86_BUILTIN_MOVNTQ,
21329 IX86_BUILTIN_LOADDQU,
21330 IX86_BUILTIN_STOREDQU,
21332 IX86_BUILTIN_PACKSSWB,
21333 IX86_BUILTIN_PACKSSDW,
21334 IX86_BUILTIN_PACKUSWB,
21336 IX86_BUILTIN_PADDB,
21337 IX86_BUILTIN_PADDW,
21338 IX86_BUILTIN_PADDD,
21339 IX86_BUILTIN_PADDQ,
21340 IX86_BUILTIN_PADDSB,
21341 IX86_BUILTIN_PADDSW,
21342 IX86_BUILTIN_PADDUSB,
21343 IX86_BUILTIN_PADDUSW,
21344 IX86_BUILTIN_PSUBB,
21345 IX86_BUILTIN_PSUBW,
21346 IX86_BUILTIN_PSUBD,
21347 IX86_BUILTIN_PSUBQ,
21348 IX86_BUILTIN_PSUBSB,
21349 IX86_BUILTIN_PSUBSW,
21350 IX86_BUILTIN_PSUBUSB,
21351 IX86_BUILTIN_PSUBUSW,
21354 IX86_BUILTIN_PANDN,
21358 IX86_BUILTIN_PAVGB,
21359 IX86_BUILTIN_PAVGW,
21361 IX86_BUILTIN_PCMPEQB,
21362 IX86_BUILTIN_PCMPEQW,
21363 IX86_BUILTIN_PCMPEQD,
21364 IX86_BUILTIN_PCMPGTB,
21365 IX86_BUILTIN_PCMPGTW,
21366 IX86_BUILTIN_PCMPGTD,
21368 IX86_BUILTIN_PMADDWD,
21370 IX86_BUILTIN_PMAXSW,
21371 IX86_BUILTIN_PMAXUB,
21372 IX86_BUILTIN_PMINSW,
21373 IX86_BUILTIN_PMINUB,
21375 IX86_BUILTIN_PMULHUW,
21376 IX86_BUILTIN_PMULHW,
21377 IX86_BUILTIN_PMULLW,
21379 IX86_BUILTIN_PSADBW,
21380 IX86_BUILTIN_PSHUFW,
21382 IX86_BUILTIN_PSLLW,
21383 IX86_BUILTIN_PSLLD,
21384 IX86_BUILTIN_PSLLQ,
21385 IX86_BUILTIN_PSRAW,
21386 IX86_BUILTIN_PSRAD,
21387 IX86_BUILTIN_PSRLW,
21388 IX86_BUILTIN_PSRLD,
21389 IX86_BUILTIN_PSRLQ,
21390 IX86_BUILTIN_PSLLWI,
21391 IX86_BUILTIN_PSLLDI,
21392 IX86_BUILTIN_PSLLQI,
21393 IX86_BUILTIN_PSRAWI,
21394 IX86_BUILTIN_PSRADI,
21395 IX86_BUILTIN_PSRLWI,
21396 IX86_BUILTIN_PSRLDI,
21397 IX86_BUILTIN_PSRLQI,
21399 IX86_BUILTIN_PUNPCKHBW,
21400 IX86_BUILTIN_PUNPCKHWD,
21401 IX86_BUILTIN_PUNPCKHDQ,
21402 IX86_BUILTIN_PUNPCKLBW,
21403 IX86_BUILTIN_PUNPCKLWD,
21404 IX86_BUILTIN_PUNPCKLDQ,
21406 IX86_BUILTIN_SHUFPS,
21408 IX86_BUILTIN_RCPPS,
21409 IX86_BUILTIN_RCPSS,
21410 IX86_BUILTIN_RSQRTPS,
21411 IX86_BUILTIN_RSQRTPS_NR,
21412 IX86_BUILTIN_RSQRTSS,
21413 IX86_BUILTIN_RSQRTF,
21414 IX86_BUILTIN_SQRTPS,
21415 IX86_BUILTIN_SQRTPS_NR,
21416 IX86_BUILTIN_SQRTSS,
21418 IX86_BUILTIN_UNPCKHPS,
21419 IX86_BUILTIN_UNPCKLPS,
21421 IX86_BUILTIN_ANDPS,
21422 IX86_BUILTIN_ANDNPS,
21424 IX86_BUILTIN_XORPS,
21427 IX86_BUILTIN_LDMXCSR,
21428 IX86_BUILTIN_STMXCSR,
21429 IX86_BUILTIN_SFENCE,
21431 /* 3DNow! Original */
21432 IX86_BUILTIN_FEMMS,
21433 IX86_BUILTIN_PAVGUSB,
21434 IX86_BUILTIN_PF2ID,
21435 IX86_BUILTIN_PFACC,
21436 IX86_BUILTIN_PFADD,
21437 IX86_BUILTIN_PFCMPEQ,
21438 IX86_BUILTIN_PFCMPGE,
21439 IX86_BUILTIN_PFCMPGT,
21440 IX86_BUILTIN_PFMAX,
21441 IX86_BUILTIN_PFMIN,
21442 IX86_BUILTIN_PFMUL,
21443 IX86_BUILTIN_PFRCP,
21444 IX86_BUILTIN_PFRCPIT1,
21445 IX86_BUILTIN_PFRCPIT2,
21446 IX86_BUILTIN_PFRSQIT1,
21447 IX86_BUILTIN_PFRSQRT,
21448 IX86_BUILTIN_PFSUB,
21449 IX86_BUILTIN_PFSUBR,
21450 IX86_BUILTIN_PI2FD,
21451 IX86_BUILTIN_PMULHRW,
21453 /* 3DNow! Athlon Extensions */
21454 IX86_BUILTIN_PF2IW,
21455 IX86_BUILTIN_PFNACC,
21456 IX86_BUILTIN_PFPNACC,
21457 IX86_BUILTIN_PI2FW,
21458 IX86_BUILTIN_PSWAPDSI,
21459 IX86_BUILTIN_PSWAPDSF,
21462 IX86_BUILTIN_ADDPD,
21463 IX86_BUILTIN_ADDSD,
21464 IX86_BUILTIN_DIVPD,
21465 IX86_BUILTIN_DIVSD,
21466 IX86_BUILTIN_MULPD,
21467 IX86_BUILTIN_MULSD,
21468 IX86_BUILTIN_SUBPD,
21469 IX86_BUILTIN_SUBSD,
21471 IX86_BUILTIN_CMPEQPD,
21472 IX86_BUILTIN_CMPLTPD,
21473 IX86_BUILTIN_CMPLEPD,
21474 IX86_BUILTIN_CMPGTPD,
21475 IX86_BUILTIN_CMPGEPD,
21476 IX86_BUILTIN_CMPNEQPD,
21477 IX86_BUILTIN_CMPNLTPD,
21478 IX86_BUILTIN_CMPNLEPD,
21479 IX86_BUILTIN_CMPNGTPD,
21480 IX86_BUILTIN_CMPNGEPD,
21481 IX86_BUILTIN_CMPORDPD,
21482 IX86_BUILTIN_CMPUNORDPD,
21483 IX86_BUILTIN_CMPEQSD,
21484 IX86_BUILTIN_CMPLTSD,
21485 IX86_BUILTIN_CMPLESD,
21486 IX86_BUILTIN_CMPNEQSD,
21487 IX86_BUILTIN_CMPNLTSD,
21488 IX86_BUILTIN_CMPNLESD,
21489 IX86_BUILTIN_CMPORDSD,
21490 IX86_BUILTIN_CMPUNORDSD,
21492 IX86_BUILTIN_COMIEQSD,
21493 IX86_BUILTIN_COMILTSD,
21494 IX86_BUILTIN_COMILESD,
21495 IX86_BUILTIN_COMIGTSD,
21496 IX86_BUILTIN_COMIGESD,
21497 IX86_BUILTIN_COMINEQSD,
21498 IX86_BUILTIN_UCOMIEQSD,
21499 IX86_BUILTIN_UCOMILTSD,
21500 IX86_BUILTIN_UCOMILESD,
21501 IX86_BUILTIN_UCOMIGTSD,
21502 IX86_BUILTIN_UCOMIGESD,
21503 IX86_BUILTIN_UCOMINEQSD,
21505 IX86_BUILTIN_MAXPD,
21506 IX86_BUILTIN_MAXSD,
21507 IX86_BUILTIN_MINPD,
21508 IX86_BUILTIN_MINSD,
21510 IX86_BUILTIN_ANDPD,
21511 IX86_BUILTIN_ANDNPD,
21513 IX86_BUILTIN_XORPD,
21515 IX86_BUILTIN_SQRTPD,
21516 IX86_BUILTIN_SQRTSD,
21518 IX86_BUILTIN_UNPCKHPD,
21519 IX86_BUILTIN_UNPCKLPD,
21521 IX86_BUILTIN_SHUFPD,
21523 IX86_BUILTIN_LOADUPD,
21524 IX86_BUILTIN_STOREUPD,
21525 IX86_BUILTIN_MOVSD,
21527 IX86_BUILTIN_LOADHPD,
21528 IX86_BUILTIN_LOADLPD,
21530 IX86_BUILTIN_CVTDQ2PD,
21531 IX86_BUILTIN_CVTDQ2PS,
21533 IX86_BUILTIN_CVTPD2DQ,
21534 IX86_BUILTIN_CVTPD2PI,
21535 IX86_BUILTIN_CVTPD2PS,
21536 IX86_BUILTIN_CVTTPD2DQ,
21537 IX86_BUILTIN_CVTTPD2PI,
21539 IX86_BUILTIN_CVTPI2PD,
21540 IX86_BUILTIN_CVTSI2SD,
21541 IX86_BUILTIN_CVTSI642SD,
21543 IX86_BUILTIN_CVTSD2SI,
21544 IX86_BUILTIN_CVTSD2SI64,
21545 IX86_BUILTIN_CVTSD2SS,
21546 IX86_BUILTIN_CVTSS2SD,
21547 IX86_BUILTIN_CVTTSD2SI,
21548 IX86_BUILTIN_CVTTSD2SI64,
21550 IX86_BUILTIN_CVTPS2DQ,
21551 IX86_BUILTIN_CVTPS2PD,
21552 IX86_BUILTIN_CVTTPS2DQ,
21554 IX86_BUILTIN_MOVNTI,
21555 IX86_BUILTIN_MOVNTPD,
21556 IX86_BUILTIN_MOVNTDQ,
21558 IX86_BUILTIN_MOVQ128,
21561 IX86_BUILTIN_MASKMOVDQU,
21562 IX86_BUILTIN_MOVMSKPD,
21563 IX86_BUILTIN_PMOVMSKB128,
21565 IX86_BUILTIN_PACKSSWB128,
21566 IX86_BUILTIN_PACKSSDW128,
21567 IX86_BUILTIN_PACKUSWB128,
21569 IX86_BUILTIN_PADDB128,
21570 IX86_BUILTIN_PADDW128,
21571 IX86_BUILTIN_PADDD128,
21572 IX86_BUILTIN_PADDQ128,
21573 IX86_BUILTIN_PADDSB128,
21574 IX86_BUILTIN_PADDSW128,
21575 IX86_BUILTIN_PADDUSB128,
21576 IX86_BUILTIN_PADDUSW128,
21577 IX86_BUILTIN_PSUBB128,
21578 IX86_BUILTIN_PSUBW128,
21579 IX86_BUILTIN_PSUBD128,
21580 IX86_BUILTIN_PSUBQ128,
21581 IX86_BUILTIN_PSUBSB128,
21582 IX86_BUILTIN_PSUBSW128,
21583 IX86_BUILTIN_PSUBUSB128,
21584 IX86_BUILTIN_PSUBUSW128,
21586 IX86_BUILTIN_PAND128,
21587 IX86_BUILTIN_PANDN128,
21588 IX86_BUILTIN_POR128,
21589 IX86_BUILTIN_PXOR128,
21591 IX86_BUILTIN_PAVGB128,
21592 IX86_BUILTIN_PAVGW128,
21594 IX86_BUILTIN_PCMPEQB128,
21595 IX86_BUILTIN_PCMPEQW128,
21596 IX86_BUILTIN_PCMPEQD128,
21597 IX86_BUILTIN_PCMPGTB128,
21598 IX86_BUILTIN_PCMPGTW128,
21599 IX86_BUILTIN_PCMPGTD128,
21601 IX86_BUILTIN_PMADDWD128,
21603 IX86_BUILTIN_PMAXSW128,
21604 IX86_BUILTIN_PMAXUB128,
21605 IX86_BUILTIN_PMINSW128,
21606 IX86_BUILTIN_PMINUB128,
21608 IX86_BUILTIN_PMULUDQ,
21609 IX86_BUILTIN_PMULUDQ128,
21610 IX86_BUILTIN_PMULHUW128,
21611 IX86_BUILTIN_PMULHW128,
21612 IX86_BUILTIN_PMULLW128,
21614 IX86_BUILTIN_PSADBW128,
21615 IX86_BUILTIN_PSHUFHW,
21616 IX86_BUILTIN_PSHUFLW,
21617 IX86_BUILTIN_PSHUFD,
21619 IX86_BUILTIN_PSLLDQI128,
21620 IX86_BUILTIN_PSLLWI128,
21621 IX86_BUILTIN_PSLLDI128,
21622 IX86_BUILTIN_PSLLQI128,
21623 IX86_BUILTIN_PSRAWI128,
21624 IX86_BUILTIN_PSRADI128,
21625 IX86_BUILTIN_PSRLDQI128,
21626 IX86_BUILTIN_PSRLWI128,
21627 IX86_BUILTIN_PSRLDI128,
21628 IX86_BUILTIN_PSRLQI128,
21630 IX86_BUILTIN_PSLLDQ128,
21631 IX86_BUILTIN_PSLLW128,
21632 IX86_BUILTIN_PSLLD128,
21633 IX86_BUILTIN_PSLLQ128,
21634 IX86_BUILTIN_PSRAW128,
21635 IX86_BUILTIN_PSRAD128,
21636 IX86_BUILTIN_PSRLW128,
21637 IX86_BUILTIN_PSRLD128,
21638 IX86_BUILTIN_PSRLQ128,
21640 IX86_BUILTIN_PUNPCKHBW128,
21641 IX86_BUILTIN_PUNPCKHWD128,
21642 IX86_BUILTIN_PUNPCKHDQ128,
21643 IX86_BUILTIN_PUNPCKHQDQ128,
21644 IX86_BUILTIN_PUNPCKLBW128,
21645 IX86_BUILTIN_PUNPCKLWD128,
21646 IX86_BUILTIN_PUNPCKLDQ128,
21647 IX86_BUILTIN_PUNPCKLQDQ128,
21649 IX86_BUILTIN_CLFLUSH,
21650 IX86_BUILTIN_MFENCE,
21651 IX86_BUILTIN_LFENCE,
21653 IX86_BUILTIN_BSRSI,
21654 IX86_BUILTIN_BSRDI,
21655 IX86_BUILTIN_RDPMC,
21656 IX86_BUILTIN_RDTSC,
21657 IX86_BUILTIN_RDTSCP,
21658 IX86_BUILTIN_ROLQI,
21659 IX86_BUILTIN_ROLHI,
21660 IX86_BUILTIN_RORQI,
21661 IX86_BUILTIN_RORHI,
21664 IX86_BUILTIN_ADDSUBPS,
21665 IX86_BUILTIN_HADDPS,
21666 IX86_BUILTIN_HSUBPS,
21667 IX86_BUILTIN_MOVSHDUP,
21668 IX86_BUILTIN_MOVSLDUP,
21669 IX86_BUILTIN_ADDSUBPD,
21670 IX86_BUILTIN_HADDPD,
21671 IX86_BUILTIN_HSUBPD,
21672 IX86_BUILTIN_LDDQU,
21674 IX86_BUILTIN_MONITOR,
21675 IX86_BUILTIN_MWAIT,
21678 IX86_BUILTIN_PHADDW,
21679 IX86_BUILTIN_PHADDD,
21680 IX86_BUILTIN_PHADDSW,
21681 IX86_BUILTIN_PHSUBW,
21682 IX86_BUILTIN_PHSUBD,
21683 IX86_BUILTIN_PHSUBSW,
21684 IX86_BUILTIN_PMADDUBSW,
21685 IX86_BUILTIN_PMULHRSW,
21686 IX86_BUILTIN_PSHUFB,
21687 IX86_BUILTIN_PSIGNB,
21688 IX86_BUILTIN_PSIGNW,
21689 IX86_BUILTIN_PSIGND,
21690 IX86_BUILTIN_PALIGNR,
21691 IX86_BUILTIN_PABSB,
21692 IX86_BUILTIN_PABSW,
21693 IX86_BUILTIN_PABSD,
21695 IX86_BUILTIN_PHADDW128,
21696 IX86_BUILTIN_PHADDD128,
21697 IX86_BUILTIN_PHADDSW128,
21698 IX86_BUILTIN_PHSUBW128,
21699 IX86_BUILTIN_PHSUBD128,
21700 IX86_BUILTIN_PHSUBSW128,
21701 IX86_BUILTIN_PMADDUBSW128,
21702 IX86_BUILTIN_PMULHRSW128,
21703 IX86_BUILTIN_PSHUFB128,
21704 IX86_BUILTIN_PSIGNB128,
21705 IX86_BUILTIN_PSIGNW128,
21706 IX86_BUILTIN_PSIGND128,
21707 IX86_BUILTIN_PALIGNR128,
21708 IX86_BUILTIN_PABSB128,
21709 IX86_BUILTIN_PABSW128,
21710 IX86_BUILTIN_PABSD128,
21712 /* AMDFAM10 - SSE4A New Instructions. */
21713 IX86_BUILTIN_MOVNTSD,
21714 IX86_BUILTIN_MOVNTSS,
21715 IX86_BUILTIN_EXTRQI,
21716 IX86_BUILTIN_EXTRQ,
21717 IX86_BUILTIN_INSERTQI,
21718 IX86_BUILTIN_INSERTQ,
21721 IX86_BUILTIN_BLENDPD,
21722 IX86_BUILTIN_BLENDPS,
21723 IX86_BUILTIN_BLENDVPD,
21724 IX86_BUILTIN_BLENDVPS,
21725 IX86_BUILTIN_PBLENDVB128,
21726 IX86_BUILTIN_PBLENDW128,
21731 IX86_BUILTIN_INSERTPS128,
21733 IX86_BUILTIN_MOVNTDQA,
21734 IX86_BUILTIN_MPSADBW128,
21735 IX86_BUILTIN_PACKUSDW128,
21736 IX86_BUILTIN_PCMPEQQ,
21737 IX86_BUILTIN_PHMINPOSUW128,
21739 IX86_BUILTIN_PMAXSB128,
21740 IX86_BUILTIN_PMAXSD128,
21741 IX86_BUILTIN_PMAXUD128,
21742 IX86_BUILTIN_PMAXUW128,
21744 IX86_BUILTIN_PMINSB128,
21745 IX86_BUILTIN_PMINSD128,
21746 IX86_BUILTIN_PMINUD128,
21747 IX86_BUILTIN_PMINUW128,
21749 IX86_BUILTIN_PMOVSXBW128,
21750 IX86_BUILTIN_PMOVSXBD128,
21751 IX86_BUILTIN_PMOVSXBQ128,
21752 IX86_BUILTIN_PMOVSXWD128,
21753 IX86_BUILTIN_PMOVSXWQ128,
21754 IX86_BUILTIN_PMOVSXDQ128,
21756 IX86_BUILTIN_PMOVZXBW128,
21757 IX86_BUILTIN_PMOVZXBD128,
21758 IX86_BUILTIN_PMOVZXBQ128,
21759 IX86_BUILTIN_PMOVZXWD128,
21760 IX86_BUILTIN_PMOVZXWQ128,
21761 IX86_BUILTIN_PMOVZXDQ128,
21763 IX86_BUILTIN_PMULDQ128,
21764 IX86_BUILTIN_PMULLD128,
21766 IX86_BUILTIN_ROUNDPD,
21767 IX86_BUILTIN_ROUNDPS,
21768 IX86_BUILTIN_ROUNDSD,
21769 IX86_BUILTIN_ROUNDSS,
21771 IX86_BUILTIN_PTESTZ,
21772 IX86_BUILTIN_PTESTC,
21773 IX86_BUILTIN_PTESTNZC,
21775 IX86_BUILTIN_VEC_INIT_V2SI,
21776 IX86_BUILTIN_VEC_INIT_V4HI,
21777 IX86_BUILTIN_VEC_INIT_V8QI,
21778 IX86_BUILTIN_VEC_EXT_V2DF,
21779 IX86_BUILTIN_VEC_EXT_V2DI,
21780 IX86_BUILTIN_VEC_EXT_V4SF,
21781 IX86_BUILTIN_VEC_EXT_V4SI,
21782 IX86_BUILTIN_VEC_EXT_V8HI,
21783 IX86_BUILTIN_VEC_EXT_V2SI,
21784 IX86_BUILTIN_VEC_EXT_V4HI,
21785 IX86_BUILTIN_VEC_EXT_V16QI,
21786 IX86_BUILTIN_VEC_SET_V2DI,
21787 IX86_BUILTIN_VEC_SET_V4SF,
21788 IX86_BUILTIN_VEC_SET_V4SI,
21789 IX86_BUILTIN_VEC_SET_V8HI,
21790 IX86_BUILTIN_VEC_SET_V4HI,
21791 IX86_BUILTIN_VEC_SET_V16QI,
21793 IX86_BUILTIN_VEC_PACK_SFIX,
21796 IX86_BUILTIN_CRC32QI,
21797 IX86_BUILTIN_CRC32HI,
21798 IX86_BUILTIN_CRC32SI,
21799 IX86_BUILTIN_CRC32DI,
21801 IX86_BUILTIN_PCMPESTRI128,
21802 IX86_BUILTIN_PCMPESTRM128,
21803 IX86_BUILTIN_PCMPESTRA128,
21804 IX86_BUILTIN_PCMPESTRC128,
21805 IX86_BUILTIN_PCMPESTRO128,
21806 IX86_BUILTIN_PCMPESTRS128,
21807 IX86_BUILTIN_PCMPESTRZ128,
21808 IX86_BUILTIN_PCMPISTRI128,
21809 IX86_BUILTIN_PCMPISTRM128,
21810 IX86_BUILTIN_PCMPISTRA128,
21811 IX86_BUILTIN_PCMPISTRC128,
21812 IX86_BUILTIN_PCMPISTRO128,
21813 IX86_BUILTIN_PCMPISTRS128,
21814 IX86_BUILTIN_PCMPISTRZ128,
21816 IX86_BUILTIN_PCMPGTQ,
21818 /* AES instructions */
21819 IX86_BUILTIN_AESENC128,
21820 IX86_BUILTIN_AESENCLAST128,
21821 IX86_BUILTIN_AESDEC128,
21822 IX86_BUILTIN_AESDECLAST128,
21823 IX86_BUILTIN_AESIMC128,
21824 IX86_BUILTIN_AESKEYGENASSIST128,
21826 /* PCLMUL instruction */
21827 IX86_BUILTIN_PCLMULQDQ128,
21830 IX86_BUILTIN_ADDPD256,
21831 IX86_BUILTIN_ADDPS256,
21832 IX86_BUILTIN_ADDSUBPD256,
21833 IX86_BUILTIN_ADDSUBPS256,
21834 IX86_BUILTIN_ANDPD256,
21835 IX86_BUILTIN_ANDPS256,
21836 IX86_BUILTIN_ANDNPD256,
21837 IX86_BUILTIN_ANDNPS256,
21838 IX86_BUILTIN_BLENDPD256,
21839 IX86_BUILTIN_BLENDPS256,
21840 IX86_BUILTIN_BLENDVPD256,
21841 IX86_BUILTIN_BLENDVPS256,
21842 IX86_BUILTIN_DIVPD256,
21843 IX86_BUILTIN_DIVPS256,
21844 IX86_BUILTIN_DPPS256,
21845 IX86_BUILTIN_HADDPD256,
21846 IX86_BUILTIN_HADDPS256,
21847 IX86_BUILTIN_HSUBPD256,
21848 IX86_BUILTIN_HSUBPS256,
21849 IX86_BUILTIN_MAXPD256,
21850 IX86_BUILTIN_MAXPS256,
21851 IX86_BUILTIN_MINPD256,
21852 IX86_BUILTIN_MINPS256,
21853 IX86_BUILTIN_MULPD256,
21854 IX86_BUILTIN_MULPS256,
21855 IX86_BUILTIN_ORPD256,
21856 IX86_BUILTIN_ORPS256,
21857 IX86_BUILTIN_SHUFPD256,
21858 IX86_BUILTIN_SHUFPS256,
21859 IX86_BUILTIN_SUBPD256,
21860 IX86_BUILTIN_SUBPS256,
21861 IX86_BUILTIN_XORPD256,
21862 IX86_BUILTIN_XORPS256,
21863 IX86_BUILTIN_CMPSD,
21864 IX86_BUILTIN_CMPSS,
21865 IX86_BUILTIN_CMPPD,
21866 IX86_BUILTIN_CMPPS,
21867 IX86_BUILTIN_CMPPD256,
21868 IX86_BUILTIN_CMPPS256,
21869 IX86_BUILTIN_CVTDQ2PD256,
21870 IX86_BUILTIN_CVTDQ2PS256,
21871 IX86_BUILTIN_CVTPD2PS256,
21872 IX86_BUILTIN_CVTPS2DQ256,
21873 IX86_BUILTIN_CVTPS2PD256,
21874 IX86_BUILTIN_CVTTPD2DQ256,
21875 IX86_BUILTIN_CVTPD2DQ256,
21876 IX86_BUILTIN_CVTTPS2DQ256,
21877 IX86_BUILTIN_EXTRACTF128PD256,
21878 IX86_BUILTIN_EXTRACTF128PS256,
21879 IX86_BUILTIN_EXTRACTF128SI256,
21880 IX86_BUILTIN_VZEROALL,
21881 IX86_BUILTIN_VZEROUPPER,
21882 IX86_BUILTIN_VPERMILVARPD,
21883 IX86_BUILTIN_VPERMILVARPS,
21884 IX86_BUILTIN_VPERMILVARPD256,
21885 IX86_BUILTIN_VPERMILVARPS256,
21886 IX86_BUILTIN_VPERMILPD,
21887 IX86_BUILTIN_VPERMILPS,
21888 IX86_BUILTIN_VPERMILPD256,
21889 IX86_BUILTIN_VPERMILPS256,
21890 IX86_BUILTIN_VPERMIL2PD,
21891 IX86_BUILTIN_VPERMIL2PS,
21892 IX86_BUILTIN_VPERMIL2PD256,
21893 IX86_BUILTIN_VPERMIL2PS256,
21894 IX86_BUILTIN_VPERM2F128PD256,
21895 IX86_BUILTIN_VPERM2F128PS256,
21896 IX86_BUILTIN_VPERM2F128SI256,
21897 IX86_BUILTIN_VBROADCASTSS,
21898 IX86_BUILTIN_VBROADCASTSD256,
21899 IX86_BUILTIN_VBROADCASTSS256,
21900 IX86_BUILTIN_VBROADCASTPD256,
21901 IX86_BUILTIN_VBROADCASTPS256,
21902 IX86_BUILTIN_VINSERTF128PD256,
21903 IX86_BUILTIN_VINSERTF128PS256,
21904 IX86_BUILTIN_VINSERTF128SI256,
21905 IX86_BUILTIN_LOADUPD256,
21906 IX86_BUILTIN_LOADUPS256,
21907 IX86_BUILTIN_STOREUPD256,
21908 IX86_BUILTIN_STOREUPS256,
21909 IX86_BUILTIN_LDDQU256,
21910 IX86_BUILTIN_MOVNTDQ256,
21911 IX86_BUILTIN_MOVNTPD256,
21912 IX86_BUILTIN_MOVNTPS256,
21913 IX86_BUILTIN_LOADDQU256,
21914 IX86_BUILTIN_STOREDQU256,
21915 IX86_BUILTIN_MASKLOADPD,
21916 IX86_BUILTIN_MASKLOADPS,
21917 IX86_BUILTIN_MASKSTOREPD,
21918 IX86_BUILTIN_MASKSTOREPS,
21919 IX86_BUILTIN_MASKLOADPD256,
21920 IX86_BUILTIN_MASKLOADPS256,
21921 IX86_BUILTIN_MASKSTOREPD256,
21922 IX86_BUILTIN_MASKSTOREPS256,
21923 IX86_BUILTIN_MOVSHDUP256,
21924 IX86_BUILTIN_MOVSLDUP256,
21925 IX86_BUILTIN_MOVDDUP256,
21927 IX86_BUILTIN_SQRTPD256,
21928 IX86_BUILTIN_SQRTPS256,
21929 IX86_BUILTIN_SQRTPS_NR256,
21930 IX86_BUILTIN_RSQRTPS256,
21931 IX86_BUILTIN_RSQRTPS_NR256,
21933 IX86_BUILTIN_RCPPS256,
21935 IX86_BUILTIN_ROUNDPD256,
21936 IX86_BUILTIN_ROUNDPS256,
21938 IX86_BUILTIN_UNPCKHPD256,
21939 IX86_BUILTIN_UNPCKLPD256,
21940 IX86_BUILTIN_UNPCKHPS256,
21941 IX86_BUILTIN_UNPCKLPS256,
21943 IX86_BUILTIN_SI256_SI,
21944 IX86_BUILTIN_PS256_PS,
21945 IX86_BUILTIN_PD256_PD,
21946 IX86_BUILTIN_SI_SI256,
21947 IX86_BUILTIN_PS_PS256,
21948 IX86_BUILTIN_PD_PD256,
21950 IX86_BUILTIN_VTESTZPD,
21951 IX86_BUILTIN_VTESTCPD,
21952 IX86_BUILTIN_VTESTNZCPD,
21953 IX86_BUILTIN_VTESTZPS,
21954 IX86_BUILTIN_VTESTCPS,
21955 IX86_BUILTIN_VTESTNZCPS,
21956 IX86_BUILTIN_VTESTZPD256,
21957 IX86_BUILTIN_VTESTCPD256,
21958 IX86_BUILTIN_VTESTNZCPD256,
21959 IX86_BUILTIN_VTESTZPS256,
21960 IX86_BUILTIN_VTESTCPS256,
21961 IX86_BUILTIN_VTESTNZCPS256,
21962 IX86_BUILTIN_PTESTZ256,
21963 IX86_BUILTIN_PTESTC256,
21964 IX86_BUILTIN_PTESTNZC256,
21966 IX86_BUILTIN_MOVMSKPD256,
21967 IX86_BUILTIN_MOVMSKPS256,
21969 /* TFmode support builtins. */
21971 IX86_BUILTIN_HUGE_VALQ,
21972 IX86_BUILTIN_FABSQ,
21973 IX86_BUILTIN_COPYSIGNQ,
21975 /* Vectorizer support builtins. */
21976 IX86_BUILTIN_CPYSGNPS,
21977 IX86_BUILTIN_CPYSGNPD,
21979 IX86_BUILTIN_CVTUDQ2PS,
21981 IX86_BUILTIN_VEC_PERM_V2DF,
21982 IX86_BUILTIN_VEC_PERM_V4SF,
21983 IX86_BUILTIN_VEC_PERM_V2DI,
21984 IX86_BUILTIN_VEC_PERM_V4SI,
21985 IX86_BUILTIN_VEC_PERM_V8HI,
21986 IX86_BUILTIN_VEC_PERM_V16QI,
21987 IX86_BUILTIN_VEC_PERM_V2DI_U,
21988 IX86_BUILTIN_VEC_PERM_V4SI_U,
21989 IX86_BUILTIN_VEC_PERM_V8HI_U,
21990 IX86_BUILTIN_VEC_PERM_V16QI_U,
21991 IX86_BUILTIN_VEC_PERM_V4DF,
21992 IX86_BUILTIN_VEC_PERM_V8SF,
21994 /* FMA4 and XOP instructions. */
21995 IX86_BUILTIN_VFMADDSS,
21996 IX86_BUILTIN_VFMADDSD,
21997 IX86_BUILTIN_VFMADDPS,
21998 IX86_BUILTIN_VFMADDPD,
21999 IX86_BUILTIN_VFMSUBSS,
22000 IX86_BUILTIN_VFMSUBSD,
22001 IX86_BUILTIN_VFMSUBPS,
22002 IX86_BUILTIN_VFMSUBPD,
22003 IX86_BUILTIN_VFMADDSUBPS,
22004 IX86_BUILTIN_VFMADDSUBPD,
22005 IX86_BUILTIN_VFMSUBADDPS,
22006 IX86_BUILTIN_VFMSUBADDPD,
22007 IX86_BUILTIN_VFNMADDSS,
22008 IX86_BUILTIN_VFNMADDSD,
22009 IX86_BUILTIN_VFNMADDPS,
22010 IX86_BUILTIN_VFNMADDPD,
22011 IX86_BUILTIN_VFNMSUBSS,
22012 IX86_BUILTIN_VFNMSUBSD,
22013 IX86_BUILTIN_VFNMSUBPS,
22014 IX86_BUILTIN_VFNMSUBPD,
22015 IX86_BUILTIN_VFMADDPS256,
22016 IX86_BUILTIN_VFMADDPD256,
22017 IX86_BUILTIN_VFMSUBPS256,
22018 IX86_BUILTIN_VFMSUBPD256,
22019 IX86_BUILTIN_VFMADDSUBPS256,
22020 IX86_BUILTIN_VFMADDSUBPD256,
22021 IX86_BUILTIN_VFMSUBADDPS256,
22022 IX86_BUILTIN_VFMSUBADDPD256,
22023 IX86_BUILTIN_VFNMADDPS256,
22024 IX86_BUILTIN_VFNMADDPD256,
22025 IX86_BUILTIN_VFNMSUBPS256,
22026 IX86_BUILTIN_VFNMSUBPD256,
22028 IX86_BUILTIN_VPCMOV,
22029 IX86_BUILTIN_VPCMOV_V2DI,
22030 IX86_BUILTIN_VPCMOV_V4SI,
22031 IX86_BUILTIN_VPCMOV_V8HI,
22032 IX86_BUILTIN_VPCMOV_V16QI,
22033 IX86_BUILTIN_VPCMOV_V4SF,
22034 IX86_BUILTIN_VPCMOV_V2DF,
22035 IX86_BUILTIN_VPCMOV256,
22036 IX86_BUILTIN_VPCMOV_V4DI256,
22037 IX86_BUILTIN_VPCMOV_V8SI256,
22038 IX86_BUILTIN_VPCMOV_V16HI256,
22039 IX86_BUILTIN_VPCMOV_V32QI256,
22040 IX86_BUILTIN_VPCMOV_V8SF256,
22041 IX86_BUILTIN_VPCMOV_V4DF256,
22043 IX86_BUILTIN_VPPERM,
22045 IX86_BUILTIN_VPMACSSWW,
22046 IX86_BUILTIN_VPMACSWW,
22047 IX86_BUILTIN_VPMACSSWD,
22048 IX86_BUILTIN_VPMACSWD,
22049 IX86_BUILTIN_VPMACSSDD,
22050 IX86_BUILTIN_VPMACSDD,
22051 IX86_BUILTIN_VPMACSSDQL,
22052 IX86_BUILTIN_VPMACSSDQH,
22053 IX86_BUILTIN_VPMACSDQL,
22054 IX86_BUILTIN_VPMACSDQH,
22055 IX86_BUILTIN_VPMADCSSWD,
22056 IX86_BUILTIN_VPMADCSWD,
22058 IX86_BUILTIN_VPHADDBW,
22059 IX86_BUILTIN_VPHADDBD,
22060 IX86_BUILTIN_VPHADDBQ,
22061 IX86_BUILTIN_VPHADDWD,
22062 IX86_BUILTIN_VPHADDWQ,
22063 IX86_BUILTIN_VPHADDDQ,
22064 IX86_BUILTIN_VPHADDUBW,
22065 IX86_BUILTIN_VPHADDUBD,
22066 IX86_BUILTIN_VPHADDUBQ,
22067 IX86_BUILTIN_VPHADDUWD,
22068 IX86_BUILTIN_VPHADDUWQ,
22069 IX86_BUILTIN_VPHADDUDQ,
22070 IX86_BUILTIN_VPHSUBBW,
22071 IX86_BUILTIN_VPHSUBWD,
22072 IX86_BUILTIN_VPHSUBDQ,
22074 IX86_BUILTIN_VPROTB,
22075 IX86_BUILTIN_VPROTW,
22076 IX86_BUILTIN_VPROTD,
22077 IX86_BUILTIN_VPROTQ,
22078 IX86_BUILTIN_VPROTB_IMM,
22079 IX86_BUILTIN_VPROTW_IMM,
22080 IX86_BUILTIN_VPROTD_IMM,
22081 IX86_BUILTIN_VPROTQ_IMM,
22083 IX86_BUILTIN_VPSHLB,
22084 IX86_BUILTIN_VPSHLW,
22085 IX86_BUILTIN_VPSHLD,
22086 IX86_BUILTIN_VPSHLQ,
22087 IX86_BUILTIN_VPSHAB,
22088 IX86_BUILTIN_VPSHAW,
22089 IX86_BUILTIN_VPSHAD,
22090 IX86_BUILTIN_VPSHAQ,
22092 IX86_BUILTIN_VFRCZSS,
22093 IX86_BUILTIN_VFRCZSD,
22094 IX86_BUILTIN_VFRCZPS,
22095 IX86_BUILTIN_VFRCZPD,
22096 IX86_BUILTIN_VFRCZPS256,
22097 IX86_BUILTIN_VFRCZPD256,
22099 IX86_BUILTIN_VPCOMEQUB,
22100 IX86_BUILTIN_VPCOMNEUB,
22101 IX86_BUILTIN_VPCOMLTUB,
22102 IX86_BUILTIN_VPCOMLEUB,
22103 IX86_BUILTIN_VPCOMGTUB,
22104 IX86_BUILTIN_VPCOMGEUB,
22105 IX86_BUILTIN_VPCOMFALSEUB,
22106 IX86_BUILTIN_VPCOMTRUEUB,
22108 IX86_BUILTIN_VPCOMEQUW,
22109 IX86_BUILTIN_VPCOMNEUW,
22110 IX86_BUILTIN_VPCOMLTUW,
22111 IX86_BUILTIN_VPCOMLEUW,
22112 IX86_BUILTIN_VPCOMGTUW,
22113 IX86_BUILTIN_VPCOMGEUW,
22114 IX86_BUILTIN_VPCOMFALSEUW,
22115 IX86_BUILTIN_VPCOMTRUEUW,
22117 IX86_BUILTIN_VPCOMEQUD,
22118 IX86_BUILTIN_VPCOMNEUD,
22119 IX86_BUILTIN_VPCOMLTUD,
22120 IX86_BUILTIN_VPCOMLEUD,
22121 IX86_BUILTIN_VPCOMGTUD,
22122 IX86_BUILTIN_VPCOMGEUD,
22123 IX86_BUILTIN_VPCOMFALSEUD,
22124 IX86_BUILTIN_VPCOMTRUEUD,
22126 IX86_BUILTIN_VPCOMEQUQ,
22127 IX86_BUILTIN_VPCOMNEUQ,
22128 IX86_BUILTIN_VPCOMLTUQ,
22129 IX86_BUILTIN_VPCOMLEUQ,
22130 IX86_BUILTIN_VPCOMGTUQ,
22131 IX86_BUILTIN_VPCOMGEUQ,
22132 IX86_BUILTIN_VPCOMFALSEUQ,
22133 IX86_BUILTIN_VPCOMTRUEUQ,
22135 IX86_BUILTIN_VPCOMEQB,
22136 IX86_BUILTIN_VPCOMNEB,
22137 IX86_BUILTIN_VPCOMLTB,
22138 IX86_BUILTIN_VPCOMLEB,
22139 IX86_BUILTIN_VPCOMGTB,
22140 IX86_BUILTIN_VPCOMGEB,
22141 IX86_BUILTIN_VPCOMFALSEB,
22142 IX86_BUILTIN_VPCOMTRUEB,
22144 IX86_BUILTIN_VPCOMEQW,
22145 IX86_BUILTIN_VPCOMNEW,
22146 IX86_BUILTIN_VPCOMLTW,
22147 IX86_BUILTIN_VPCOMLEW,
22148 IX86_BUILTIN_VPCOMGTW,
22149 IX86_BUILTIN_VPCOMGEW,
22150 IX86_BUILTIN_VPCOMFALSEW,
22151 IX86_BUILTIN_VPCOMTRUEW,
22153 IX86_BUILTIN_VPCOMEQD,
22154 IX86_BUILTIN_VPCOMNED,
22155 IX86_BUILTIN_VPCOMLTD,
22156 IX86_BUILTIN_VPCOMLED,
22157 IX86_BUILTIN_VPCOMGTD,
22158 IX86_BUILTIN_VPCOMGED,
22159 IX86_BUILTIN_VPCOMFALSED,
22160 IX86_BUILTIN_VPCOMTRUED,
22162 IX86_BUILTIN_VPCOMEQQ,
22163 IX86_BUILTIN_VPCOMNEQ,
22164 IX86_BUILTIN_VPCOMLTQ,
22165 IX86_BUILTIN_VPCOMLEQ,
22166 IX86_BUILTIN_VPCOMGTQ,
22167 IX86_BUILTIN_VPCOMGEQ,
22168 IX86_BUILTIN_VPCOMFALSEQ,
22169 IX86_BUILTIN_VPCOMTRUEQ,
22171 /* LWP instructions. */
22172 IX86_BUILTIN_LLWPCB,
22173 IX86_BUILTIN_SLWPCB,
22174 IX86_BUILTIN_LWPVAL32,
22175 IX86_BUILTIN_LWPVAL64,
22176 IX86_BUILTIN_LWPINS32,
22177 IX86_BUILTIN_LWPINS64,
22181 /* FSGSBASE instructions. */
22182 IX86_BUILTIN_RDFSBASE32,
22183 IX86_BUILTIN_RDFSBASE64,
22184 IX86_BUILTIN_RDGSBASE32,
22185 IX86_BUILTIN_RDGSBASE64,
22186 IX86_BUILTIN_WRFSBASE32,
22187 IX86_BUILTIN_WRFSBASE64,
22188 IX86_BUILTIN_WRGSBASE32,
22189 IX86_BUILTIN_WRGSBASE64,
22191 /* RDRND instructions. */
22192 IX86_BUILTIN_RDRAND16,
22193 IX86_BUILTIN_RDRAND32,
22194 IX86_BUILTIN_RDRAND64,
22196 /* F16C instructions. */
22197 IX86_BUILTIN_CVTPH2PS,
22198 IX86_BUILTIN_CVTPH2PS256,
22199 IX86_BUILTIN_CVTPS2PH,
22200 IX86_BUILTIN_CVTPS2PH256,
22205 /* Table for the ix86 builtin decls. */
22206 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
22208 /* Table of all of the builtin functions that are possible with different ISA's
22209 but are waiting to be built until a function is declared to use that
22211 struct builtin_isa {
22212 const char *name; /* function name */
22213 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
22214 int isa; /* isa_flags this builtin is defined for */
22215 bool const_p; /* true if the declaration is constant */
22216 bool set_and_not_built_p;
22219 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
22222 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
22223 of which isa_flags to use in the ix86_builtins_isa array. Stores the
22224 function decl in the ix86_builtins array. Returns the function decl or
22225 NULL_TREE, if the builtin was not added.
22227 If the front end has a special hook for builtin functions, delay adding
22228 builtin functions that aren't in the current ISA until the ISA is changed
22229 with function specific optimization. Doing so, can save about 300K for the
22230 default compiler. When the builtin is expanded, check at that time whether
22233 If the front end doesn't have a special hook, record all builtins, even if
22234 it isn't an instruction set in the current ISA in case the user uses
22235 function specific options for a different ISA, so that we don't get scope
22236 errors if a builtin is added in the middle of a function scope. */
22239 def_builtin (int mask, const char *name, enum ix86_builtin_func_type tcode,
22240 enum ix86_builtins code)
22242 tree decl = NULL_TREE;
22244 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
22246 ix86_builtins_isa[(int) code].isa = mask;
22248 mask &= ~OPTION_MASK_ISA_64BIT;
22250 || (mask & ix86_isa_flags) != 0
22251 || (lang_hooks.builtin_function
22252 == lang_hooks.builtin_function_ext_scope))
22255 tree type = ix86_get_builtin_func_type (tcode);
22256 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
22258 ix86_builtins[(int) code] = decl;
22259 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
22263 ix86_builtins[(int) code] = NULL_TREE;
22264 ix86_builtins_isa[(int) code].tcode = tcode;
22265 ix86_builtins_isa[(int) code].name = name;
22266 ix86_builtins_isa[(int) code].const_p = false;
22267 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
22274 /* Like def_builtin, but also marks the function decl "const". */
22277 def_builtin_const (int mask, const char *name,
22278 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
22280 tree decl = def_builtin (mask, name, tcode, code);
22282 TREE_READONLY (decl) = 1;
22284 ix86_builtins_isa[(int) code].const_p = true;
22289 /* Add any new builtin functions for a given ISA that may not have been
22290 declared. This saves a bit of space compared to adding all of the
22291 declarations to the tree, even if we didn't use them. */
22294 ix86_add_new_builtins (int isa)
22298 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
22300 if ((ix86_builtins_isa[i].isa & isa) != 0
22301 && ix86_builtins_isa[i].set_and_not_built_p)
22305 /* Don't define the builtin again. */
22306 ix86_builtins_isa[i].set_and_not_built_p = false;
22308 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
22309 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
22310 type, i, BUILT_IN_MD, NULL,
22313 ix86_builtins[i] = decl;
22314 if (ix86_builtins_isa[i].const_p)
22315 TREE_READONLY (decl) = 1;
22320 /* Bits for builtin_description.flag. */
22322 /* Set when we don't support the comparison natively, and should
22323 swap_comparison in order to support it. */
22324 #define BUILTIN_DESC_SWAP_OPERANDS 1
22326 struct builtin_description
22328 const unsigned int mask;
22329 const enum insn_code icode;
22330 const char *const name;
22331 const enum ix86_builtins code;
22332 const enum rtx_code comparison;
22336 static const struct builtin_description bdesc_comi[] =
22338 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
22339 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
22340 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
22341 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
22342 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
22343 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
22344 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
22345 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
22346 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
22347 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
22348 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
22349 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
22350 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
22351 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
22352 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
22353 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
22354 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
22355 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
22356 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
22357 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
22358 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
22359 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
22360 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
22361 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
22364 static const struct builtin_description bdesc_pcmpestr[] =
22367 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
22368 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
22369 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
22370 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
22371 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
22372 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
22373 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
22376 static const struct builtin_description bdesc_pcmpistr[] =
22379 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
22380 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
22381 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
22382 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
22383 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
22384 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
22385 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
22388 /* Special builtins with variable number of arguments. */
22389 static const struct builtin_description bdesc_special_args[] =
22391 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
22392 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
22395 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
22398 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
22401 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
22402 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
22403 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
22405 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
22406 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
22407 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
22408 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
22410 /* SSE or 3DNow!A */
22411 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
22412 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
22415 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
22416 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
22417 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
22418 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
22419 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
22420 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
22421 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
22422 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
22423 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
22425 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
22426 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
22429 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
22432 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
22435 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
22436 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
22439 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
22440 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
22442 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
22443 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
22444 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
22445 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
22446 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
22448 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
22449 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
22450 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
22451 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
22452 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
22453 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
22454 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
22456 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
22457 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
22458 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
22460 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
22461 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
22462 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
22463 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
22464 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
22465 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
22466 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
22467 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
22469 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
22470 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
22471 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
22472 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
22473 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
22474 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
22477 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
22478 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
22479 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
22480 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
22481 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
22482 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
22483 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
22484 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
22487 { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandhi, "__builtin_ia32_rdrand16", IX86_BUILTIN_RDRAND16, UNKNOWN, (int) UINT16_FTYPE_VOID },
22488 { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandsi, "__builtin_ia32_rdrand32", IX86_BUILTIN_RDRAND32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
22489 { OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT, CODE_FOR_rdranddi, "__builtin_ia32_rdrand64", IX86_BUILTIN_RDRAND64, UNKNOWN, (int) UINT64_FTYPE_VOID },
22492 /* Builtins with variable number of arguments. */
22493 static const struct builtin_description bdesc_args[] =
22495 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
22496 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
22497 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
22498 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
22499 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
22500 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
22501 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
22504 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22505 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22506 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22507 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22508 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22509 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22511 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22512 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22513 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22514 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22515 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22516 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22517 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22518 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22520 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22521 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22523 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22524 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22525 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22526 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22528 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22529 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22530 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22531 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22532 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22533 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22535 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22536 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22537 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22538 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22539 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
22540 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
22542 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
22543 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
22544 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
22546 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
22548 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
22549 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
22550 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
22551 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
22552 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
22553 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
22555 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
22556 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
22557 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
22558 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
22559 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
22560 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
22562 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
22563 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
22564 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
22565 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
22568 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
22569 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
22570 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
22571 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
22573 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22574 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22575 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22576 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
22577 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
22578 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
22579 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22580 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22581 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22582 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22583 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22584 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22585 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22586 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22587 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22590 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
22591 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
22592 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
22593 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
22594 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22595 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22598 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
22599 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22600 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22601 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22602 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22603 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22604 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
22605 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
22606 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
22607 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
22608 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
22609 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
22611 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22613 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22614 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22615 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22616 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22617 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22618 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22619 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22620 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22622 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
22623 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
22624 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
22625 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22626 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22627 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
22628 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
22629 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
22630 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
22631 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22632 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
22633 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
22634 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
22635 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
22636 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
22637 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
22638 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
22639 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
22640 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
22641 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22642 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22643 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
22645 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22646 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22647 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22648 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22650 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22651 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22652 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22653 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22655 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22657 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22658 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22659 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22660 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22661 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22663 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
22664 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
22665 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
22667 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
22669 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
22670 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
22671 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
22673 /* SSE MMX or 3Dnow!A */
22674 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22675 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22676 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22678 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22679 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22680 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22681 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22683 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
22684 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
22686 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
22689 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22691 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
22692 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
22693 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
22694 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
22695 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
22696 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
22697 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI },
22698 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
22699 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
22700 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
22701 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
22702 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
22704 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
22705 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
22706 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
22707 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
22708 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
22709 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
22711 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
22712 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
22713 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
22714 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
22715 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
22717 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
22719 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
22720 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
22721 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
22722 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
22724 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
22725 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
22726 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
22728 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22729 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22730 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22731 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22732 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22733 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22734 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22735 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22737 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
22738 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
22739 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
22740 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
22741 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
22742 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
22743 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
22744 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
22745 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
22746 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
22747 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
22748 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
22749 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
22750 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
22751 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
22752 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
22753 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
22754 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
22755 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
22756 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
22758 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22759 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22760 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22761 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22763 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22764 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22765 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22766 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22768 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22770 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22771 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22772 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22774 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
22776 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22777 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22778 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22779 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22780 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22781 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22782 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22783 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22785 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22786 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22787 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22788 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22789 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22790 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22791 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22792 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22794 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22795 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
22797 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22798 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22799 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22800 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22802 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22803 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22805 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22806 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22807 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22808 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22809 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22810 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22812 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22813 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22814 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22815 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22817 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22818 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22819 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22820 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22821 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22822 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22823 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22824 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22826 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
22827 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
22828 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
22830 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22831 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
22833 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
22834 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
22836 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
22838 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
22839 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
22840 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
22841 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
22843 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
22844 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
22845 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
22846 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
22847 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
22848 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
22849 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
22851 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
22852 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
22853 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
22854 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
22855 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
22856 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
22857 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
22859 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
22860 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
22861 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
22862 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
22864 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
22865 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
22866 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
22868 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
22870 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
22871 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
22873 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
22876 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
22877 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
22880 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
22881 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22883 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22884 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22885 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22886 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22887 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22888 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22891 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
22892 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
22893 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
22894 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
22895 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
22896 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
22898 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22899 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22900 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22901 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22902 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22903 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22904 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22905 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22906 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22907 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22908 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22909 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22910 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
22911 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
22912 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22913 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22914 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22915 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22916 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22917 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22918 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22919 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22920 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22921 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22924 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
22925 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
22928 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22929 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22930 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
22931 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
22932 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22933 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22934 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22935 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
22936 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
22937 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
22939 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
22940 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
22941 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
22942 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
22943 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
22944 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
22945 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
22946 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
22947 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
22948 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
22949 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
22950 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
22951 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
22953 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
22954 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22955 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22956 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22957 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22958 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22959 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22960 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22961 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22962 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22963 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
22964 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22967 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
22968 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
22969 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22970 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22972 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
22973 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
22974 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
22977 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22978 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
22979 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
22980 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
22981 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
22984 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
22985 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
22986 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
22987 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22990 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
22991 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
22993 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22994 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22995 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22996 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22999 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
23002 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23003 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23004 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23005 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23006 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23007 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23008 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23009 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23010 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23011 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23012 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23013 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23014 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23015 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23016 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23017 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23018 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23019 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23020 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23021 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23022 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23023 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23024 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23025 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23026 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23027 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23029 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
23030 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
23031 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
23032 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
23034 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23035 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23036 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
23037 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
23038 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23039 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23040 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23041 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23042 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23043 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23044 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23045 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23046 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23047 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
23048 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
23049 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
23050 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
23051 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
23052 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
23053 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
23054 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
23055 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
23056 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
23057 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
23058 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23059 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23060 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
23061 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
23062 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
23063 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
23064 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
23065 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
23066 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
23067 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
23069 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23070 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23071 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
23073 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
23074 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23075 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23076 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23077 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23079 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23081 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
23082 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
23084 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23085 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23086 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23087 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23089 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
23090 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
23091 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
23092 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
23093 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
23094 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
23096 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23097 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23098 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23099 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23100 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23101 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23102 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23103 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23104 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23105 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23106 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23107 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23108 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23109 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23110 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23112 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
23113 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
23115 { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
23118 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
23119 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
23120 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
23121 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
23124 /* FMA4 and XOP. */
23125 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
23126 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
23127 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
23128 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
23129 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
23130 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
23131 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
23132 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
23133 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
23134 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
23135 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
23136 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
23137 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
23138 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
23139 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
23140 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
23141 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
23142 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
23143 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
23144 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
23145 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
23146 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
23147 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
23148 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
23149 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
23150 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
23151 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
23152 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
23153 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
23154 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
23155 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
23156 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
23157 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
23158 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
23159 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
23160 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
23161 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
23162 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
23163 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
23164 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
23165 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
23166 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
23167 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
23168 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
23169 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
23170 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
23171 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
23172 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
23173 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
23174 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
23175 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
23176 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
23178 static const struct builtin_description bdesc_multi_arg[] =
23180 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmaddv4sf4, "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23181 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmaddv2df4, "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23182 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4sf4, "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23183 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv2df4, "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23184 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv4sf4, "__builtin_ia32_vfmsubss", IX86_BUILTIN_VFMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23185 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv2df4, "__builtin_ia32_vfmsubsd", IX86_BUILTIN_VFMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23186 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4sf4, "__builtin_ia32_vfmsubps", IX86_BUILTIN_VFMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23187 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv2df4, "__builtin_ia32_vfmsubpd", IX86_BUILTIN_VFMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23189 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv4sf4, "__builtin_ia32_vfnmaddss", IX86_BUILTIN_VFNMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23190 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv2df4, "__builtin_ia32_vfnmaddsd", IX86_BUILTIN_VFNMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23191 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4sf4, "__builtin_ia32_vfnmaddps", IX86_BUILTIN_VFNMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23192 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv2df4, "__builtin_ia32_vfnmaddpd", IX86_BUILTIN_VFNMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23193 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmsubv4sf4, "__builtin_ia32_vfnmsubss", IX86_BUILTIN_VFNMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23194 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmsubv2df4, "__builtin_ia32_vfnmsubsd", IX86_BUILTIN_VFNMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23195 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv4sf4, "__builtin_ia32_vfnmsubps", IX86_BUILTIN_VFNMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23196 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv2df4, "__builtin_ia32_vfnmsubpd", IX86_BUILTIN_VFNMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23198 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4sf4, "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23199 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv2df4, "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23200 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4sf4, "__builtin_ia32_vfmsubaddps", IX86_BUILTIN_VFMSUBADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23201 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv2df4, "__builtin_ia32_vfmsubaddpd", IX86_BUILTIN_VFMSUBADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23203 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv8sf4256, "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23204 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4df4256, "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23205 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv8sf4256, "__builtin_ia32_vfmsubps256", IX86_BUILTIN_VFMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23206 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4df4256, "__builtin_ia32_vfmsubpd256", IX86_BUILTIN_VFMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23208 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv8sf4256, "__builtin_ia32_vfnmaddps256", IX86_BUILTIN_VFNMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23209 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4df4256, "__builtin_ia32_vfnmaddpd256", IX86_BUILTIN_VFNMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23210 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv8sf4256, "__builtin_ia32_vfnmsubps256", IX86_BUILTIN_VFNMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23211 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv4df4256, "__builtin_ia32_vfnmsubpd256", IX86_BUILTIN_VFNMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23213 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv8sf4, "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23214 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4df4, "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23215 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv8sf4, "__builtin_ia32_vfmsubaddps256", IX86_BUILTIN_VFMSUBADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23216 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4df4, "__builtin_ia32_vfmsubaddpd256", IX86_BUILTIN_VFMSUBADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23218 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
23219 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
23220 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
23221 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
23222 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
23223 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
23224 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
23226 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
23227 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
23228 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
23229 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
23230 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
23231 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23232 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23234 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
23236 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
23237 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
23238 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23239 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23240 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
23241 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
23242 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23243 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23244 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23245 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23246 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23247 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23249 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
23250 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
23251 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
23252 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
23253 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
23254 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
23255 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
23256 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
23257 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
23258 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
23259 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
23260 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
23261 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
23262 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
23263 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
23264 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
23266 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
23267 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
23268 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
23269 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
23270 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2256, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
23271 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2256, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
23273 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
23274 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
23275 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
23276 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
23277 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
23278 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
23279 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
23280 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
23281 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
23282 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
23283 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
23284 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
23285 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
23286 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
23287 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
23289 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
23290 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
23291 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
23292 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
23293 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
23294 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
23295 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
23297 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
23298 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
23299 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
23300 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
23301 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
23302 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
23303 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
23305 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
23306 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
23307 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
23308 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
23309 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
23310 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
23311 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
23313 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
23314 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
23315 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
23316 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
23317 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
23318 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
23319 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
23321 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
23322 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
23323 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
23324 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
23325 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
23326 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
23327 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
23329 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
23330 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
23331 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
23332 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
23333 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
23334 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
23335 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
23337 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
23338 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
23339 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
23340 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
23341 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
23342 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
23343 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
23345 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
23346 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
23347 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
23348 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
23349 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
23350 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
23351 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
23353 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
23354 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
23355 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
23356 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
23357 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
23358 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
23359 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
23360 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
23362 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
23363 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
23364 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
23365 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
23366 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
23367 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
23368 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
23369 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
23371 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
23372 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
23373 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
23374 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
23378 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
23379 in the current target ISA to allow the user to compile particular modules
23380 with different target specific options that differ from the command line
23383 ix86_init_mmx_sse_builtins (void)
23385 const struct builtin_description * d;
23386 enum ix86_builtin_func_type ftype;
23389 /* Add all special builtins with variable number of operands. */
23390 for (i = 0, d = bdesc_special_args;
23391 i < ARRAY_SIZE (bdesc_special_args);
23397 ftype = (enum ix86_builtin_func_type) d->flag;
23398 def_builtin (d->mask, d->name, ftype, d->code);
23401 /* Add all builtins with variable number of operands. */
23402 for (i = 0, d = bdesc_args;
23403 i < ARRAY_SIZE (bdesc_args);
23409 ftype = (enum ix86_builtin_func_type) d->flag;
23410 def_builtin_const (d->mask, d->name, ftype, d->code);
23413 /* pcmpestr[im] insns. */
23414 for (i = 0, d = bdesc_pcmpestr;
23415 i < ARRAY_SIZE (bdesc_pcmpestr);
23418 if (d->code == IX86_BUILTIN_PCMPESTRM128)
23419 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
23421 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
23422 def_builtin_const (d->mask, d->name, ftype, d->code);
23425 /* pcmpistr[im] insns. */
23426 for (i = 0, d = bdesc_pcmpistr;
23427 i < ARRAY_SIZE (bdesc_pcmpistr);
23430 if (d->code == IX86_BUILTIN_PCMPISTRM128)
23431 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
23433 ftype = INT_FTYPE_V16QI_V16QI_INT;
23434 def_builtin_const (d->mask, d->name, ftype, d->code);
23437 /* comi/ucomi insns. */
23438 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
23440 if (d->mask == OPTION_MASK_ISA_SSE2)
23441 ftype = INT_FTYPE_V2DF_V2DF;
23443 ftype = INT_FTYPE_V4SF_V4SF;
23444 def_builtin_const (d->mask, d->name, ftype, d->code);
23448 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
23449 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
23450 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
23451 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
23453 /* SSE or 3DNow!A */
23454 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
23455 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
23456 IX86_BUILTIN_MASKMOVQ);
23459 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
23460 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
23462 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
23463 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
23464 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
23465 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
23468 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
23469 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
23470 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
23471 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
23474 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
23475 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
23476 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
23477 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
23478 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
23479 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
23480 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
23481 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
23482 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
23483 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
23484 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
23485 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
23488 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
23489 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
23491 /* MMX access to the vec_init patterns. */
23492 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
23493 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
23495 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
23496 V4HI_FTYPE_HI_HI_HI_HI,
23497 IX86_BUILTIN_VEC_INIT_V4HI);
23499 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
23500 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
23501 IX86_BUILTIN_VEC_INIT_V8QI);
23503 /* Access to the vec_extract patterns. */
23504 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
23505 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
23506 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
23507 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
23508 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
23509 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
23510 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
23511 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
23512 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
23513 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
23515 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
23516 "__builtin_ia32_vec_ext_v4hi",
23517 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
23519 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
23520 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
23522 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
23523 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
23525 /* Access to the vec_set patterns. */
23526 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
23527 "__builtin_ia32_vec_set_v2di",
23528 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
23530 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
23531 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
23533 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
23534 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
23536 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
23537 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
23539 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
23540 "__builtin_ia32_vec_set_v4hi",
23541 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
23543 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
23544 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
23546 /* Add FMA4 multi-arg argument instructions */
23547 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
23552 ftype = (enum ix86_builtin_func_type) d->flag;
23553 def_builtin_const (d->mask, d->name, ftype, d->code);
23557 /* Internal method for ix86_init_builtins. */
23560 ix86_init_builtins_va_builtins_abi (void)
23562 tree ms_va_ref, sysv_va_ref;
23563 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
23564 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
23565 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
23566 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
23570 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
23571 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
23572 ms_va_ref = build_reference_type (ms_va_list_type_node);
23574 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
23577 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23578 fnvoid_va_start_ms =
23579 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23580 fnvoid_va_end_sysv =
23581 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
23582 fnvoid_va_start_sysv =
23583 build_varargs_function_type_list (void_type_node, sysv_va_ref,
23585 fnvoid_va_copy_ms =
23586 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
23588 fnvoid_va_copy_sysv =
23589 build_function_type_list (void_type_node, sysv_va_ref,
23590 sysv_va_ref, NULL_TREE);
23592 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
23593 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
23594 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
23595 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
23596 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
23597 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
23598 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
23599 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23600 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
23601 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23602 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
23603 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23607 ix86_init_builtin_types (void)
23609 tree float128_type_node, float80_type_node;
23611 /* The __float80 type. */
23612 float80_type_node = long_double_type_node;
23613 if (TYPE_MODE (float80_type_node) != XFmode)
23615 /* The __float80 type. */
23616 float80_type_node = make_node (REAL_TYPE);
23618 TYPE_PRECISION (float80_type_node) = 80;
23619 layout_type (float80_type_node);
23621 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
23623 /* The __float128 type. */
23624 float128_type_node = make_node (REAL_TYPE);
23625 TYPE_PRECISION (float128_type_node) = 128;
23626 layout_type (float128_type_node);
23627 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
23629 /* This macro is built by i386-builtin-types.awk. */
23630 DEFINE_BUILTIN_PRIMITIVE_TYPES;
23634 ix86_init_builtins (void)
23638 ix86_init_builtin_types ();
23640 /* TFmode support builtins. */
23641 def_builtin_const (0, "__builtin_infq",
23642 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
23643 def_builtin_const (0, "__builtin_huge_valq",
23644 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
23646 /* We will expand them to normal call if SSE2 isn't available since
23647 they are used by libgcc. */
23648 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
23649 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
23650 BUILT_IN_MD, "__fabstf2", NULL_TREE);
23651 TREE_READONLY (t) = 1;
23652 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
23654 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
23655 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
23656 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
23657 TREE_READONLY (t) = 1;
23658 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
23660 ix86_init_mmx_sse_builtins ();
23663 ix86_init_builtins_va_builtins_abi ();
23666 /* Return the ix86 builtin for CODE. */
23669 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
23671 if (code >= IX86_BUILTIN_MAX)
23672 return error_mark_node;
23674 return ix86_builtins[code];
23677 /* Errors in the source file can cause expand_expr to return const0_rtx
23678 where we expect a vector. To avoid crashing, use one of the vector
23679 clear instructions. */
23681 safe_vector_operand (rtx x, enum machine_mode mode)
23683 if (x == const0_rtx)
23684 x = CONST0_RTX (mode);
23688 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
23691 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
23694 tree arg0 = CALL_EXPR_ARG (exp, 0);
23695 tree arg1 = CALL_EXPR_ARG (exp, 1);
23696 rtx op0 = expand_normal (arg0);
23697 rtx op1 = expand_normal (arg1);
23698 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23699 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
23700 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
23702 if (VECTOR_MODE_P (mode0))
23703 op0 = safe_vector_operand (op0, mode0);
23704 if (VECTOR_MODE_P (mode1))
23705 op1 = safe_vector_operand (op1, mode1);
23707 if (optimize || !target
23708 || GET_MODE (target) != tmode
23709 || !insn_data[icode].operand[0].predicate (target, tmode))
23710 target = gen_reg_rtx (tmode);
23712 if (GET_MODE (op1) == SImode && mode1 == TImode)
23714 rtx x = gen_reg_rtx (V4SImode);
23715 emit_insn (gen_sse2_loadd (x, op1));
23716 op1 = gen_lowpart (TImode, x);
23719 if (!insn_data[icode].operand[1].predicate (op0, mode0))
23720 op0 = copy_to_mode_reg (mode0, op0);
23721 if (!insn_data[icode].operand[2].predicate (op1, mode1))
23722 op1 = copy_to_mode_reg (mode1, op1);
23724 pat = GEN_FCN (icode) (target, op0, op1);
23733 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
23736 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
23737 enum ix86_builtin_func_type m_type,
23738 enum rtx_code sub_code)
23743 bool comparison_p = false;
23745 bool last_arg_constant = false;
23746 int num_memory = 0;
23749 enum machine_mode mode;
23752 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23756 case MULTI_ARG_4_DF2_DI_I:
23757 case MULTI_ARG_4_DF2_DI_I1:
23758 case MULTI_ARG_4_SF2_SI_I:
23759 case MULTI_ARG_4_SF2_SI_I1:
23761 last_arg_constant = true;
23764 case MULTI_ARG_3_SF:
23765 case MULTI_ARG_3_DF:
23766 case MULTI_ARG_3_SF2:
23767 case MULTI_ARG_3_DF2:
23768 case MULTI_ARG_3_DI:
23769 case MULTI_ARG_3_SI:
23770 case MULTI_ARG_3_SI_DI:
23771 case MULTI_ARG_3_HI:
23772 case MULTI_ARG_3_HI_SI:
23773 case MULTI_ARG_3_QI:
23774 case MULTI_ARG_3_DI2:
23775 case MULTI_ARG_3_SI2:
23776 case MULTI_ARG_3_HI2:
23777 case MULTI_ARG_3_QI2:
23781 case MULTI_ARG_2_SF:
23782 case MULTI_ARG_2_DF:
23783 case MULTI_ARG_2_DI:
23784 case MULTI_ARG_2_SI:
23785 case MULTI_ARG_2_HI:
23786 case MULTI_ARG_2_QI:
23790 case MULTI_ARG_2_DI_IMM:
23791 case MULTI_ARG_2_SI_IMM:
23792 case MULTI_ARG_2_HI_IMM:
23793 case MULTI_ARG_2_QI_IMM:
23795 last_arg_constant = true;
23798 case MULTI_ARG_1_SF:
23799 case MULTI_ARG_1_DF:
23800 case MULTI_ARG_1_SF2:
23801 case MULTI_ARG_1_DF2:
23802 case MULTI_ARG_1_DI:
23803 case MULTI_ARG_1_SI:
23804 case MULTI_ARG_1_HI:
23805 case MULTI_ARG_1_QI:
23806 case MULTI_ARG_1_SI_DI:
23807 case MULTI_ARG_1_HI_DI:
23808 case MULTI_ARG_1_HI_SI:
23809 case MULTI_ARG_1_QI_DI:
23810 case MULTI_ARG_1_QI_SI:
23811 case MULTI_ARG_1_QI_HI:
23815 case MULTI_ARG_2_DI_CMP:
23816 case MULTI_ARG_2_SI_CMP:
23817 case MULTI_ARG_2_HI_CMP:
23818 case MULTI_ARG_2_QI_CMP:
23820 comparison_p = true;
23823 case MULTI_ARG_2_SF_TF:
23824 case MULTI_ARG_2_DF_TF:
23825 case MULTI_ARG_2_DI_TF:
23826 case MULTI_ARG_2_SI_TF:
23827 case MULTI_ARG_2_HI_TF:
23828 case MULTI_ARG_2_QI_TF:
23834 gcc_unreachable ();
23837 if (optimize || !target
23838 || GET_MODE (target) != tmode
23839 || !insn_data[icode].operand[0].predicate (target, tmode))
23840 target = gen_reg_rtx (tmode);
23842 gcc_assert (nargs <= 4);
23844 for (i = 0; i < nargs; i++)
23846 tree arg = CALL_EXPR_ARG (exp, i);
23847 rtx op = expand_normal (arg);
23848 int adjust = (comparison_p) ? 1 : 0;
23849 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
23851 if (last_arg_constant && i == nargs-1)
23853 if (!CONST_INT_P (op))
23855 error ("last argument must be an immediate");
23856 return gen_reg_rtx (tmode);
23861 if (VECTOR_MODE_P (mode))
23862 op = safe_vector_operand (op, mode);
23864 /* If we aren't optimizing, only allow one memory operand to be
23866 if (memory_operand (op, mode))
23869 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
23872 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
23874 op = force_reg (mode, op);
23878 args[i].mode = mode;
23884 pat = GEN_FCN (icode) (target, args[0].op);
23889 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
23890 GEN_INT ((int)sub_code));
23891 else if (! comparison_p)
23892 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
23895 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
23899 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
23904 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
23908 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
23912 gcc_unreachable ();
23922 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
23923 insns with vec_merge. */
23926 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
23930 tree arg0 = CALL_EXPR_ARG (exp, 0);
23931 rtx op1, op0 = expand_normal (arg0);
23932 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23933 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
23935 if (optimize || !target
23936 || GET_MODE (target) != tmode
23937 || !insn_data[icode].operand[0].predicate (target, tmode))
23938 target = gen_reg_rtx (tmode);
23940 if (VECTOR_MODE_P (mode0))
23941 op0 = safe_vector_operand (op0, mode0);
23943 if ((optimize && !register_operand (op0, mode0))
23944 || !insn_data[icode].operand[1].predicate (op0, mode0))
23945 op0 = copy_to_mode_reg (mode0, op0);
23948 if (!insn_data[icode].operand[2].predicate (op1, mode0))
23949 op1 = copy_to_mode_reg (mode0, op1);
23951 pat = GEN_FCN (icode) (target, op0, op1);
23958 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
23961 ix86_expand_sse_compare (const struct builtin_description *d,
23962 tree exp, rtx target, bool swap)
23965 tree arg0 = CALL_EXPR_ARG (exp, 0);
23966 tree arg1 = CALL_EXPR_ARG (exp, 1);
23967 rtx op0 = expand_normal (arg0);
23968 rtx op1 = expand_normal (arg1);
23970 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
23971 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
23972 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
23973 enum rtx_code comparison = d->comparison;
23975 if (VECTOR_MODE_P (mode0))
23976 op0 = safe_vector_operand (op0, mode0);
23977 if (VECTOR_MODE_P (mode1))
23978 op1 = safe_vector_operand (op1, mode1);
23980 /* Swap operands if we have a comparison that isn't available in
23984 rtx tmp = gen_reg_rtx (mode1);
23985 emit_move_insn (tmp, op1);
23990 if (optimize || !target
23991 || GET_MODE (target) != tmode
23992 || !insn_data[d->icode].operand[0].predicate (target, tmode))
23993 target = gen_reg_rtx (tmode);
23995 if ((optimize && !register_operand (op0, mode0))
23996 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
23997 op0 = copy_to_mode_reg (mode0, op0);
23998 if ((optimize && !register_operand (op1, mode1))
23999 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
24000 op1 = copy_to_mode_reg (mode1, op1);
24002 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
24003 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
24010 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
24013 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
24017 tree arg0 = CALL_EXPR_ARG (exp, 0);
24018 tree arg1 = CALL_EXPR_ARG (exp, 1);
24019 rtx op0 = expand_normal (arg0);
24020 rtx op1 = expand_normal (arg1);
24021 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24022 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24023 enum rtx_code comparison = d->comparison;
24025 if (VECTOR_MODE_P (mode0))
24026 op0 = safe_vector_operand (op0, mode0);
24027 if (VECTOR_MODE_P (mode1))
24028 op1 = safe_vector_operand (op1, mode1);
24030 /* Swap operands if we have a comparison that isn't available in
24032 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
24039 target = gen_reg_rtx (SImode);
24040 emit_move_insn (target, const0_rtx);
24041 target = gen_rtx_SUBREG (QImode, target, 0);
24043 if ((optimize && !register_operand (op0, mode0))
24044 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
24045 op0 = copy_to_mode_reg (mode0, op0);
24046 if ((optimize && !register_operand (op1, mode1))
24047 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
24048 op1 = copy_to_mode_reg (mode1, op1);
24050 pat = GEN_FCN (d->icode) (op0, op1);
24054 emit_insn (gen_rtx_SET (VOIDmode,
24055 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24056 gen_rtx_fmt_ee (comparison, QImode,
24060 return SUBREG_REG (target);
24063 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
24066 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
24070 tree arg0 = CALL_EXPR_ARG (exp, 0);
24071 tree arg1 = CALL_EXPR_ARG (exp, 1);
24072 rtx op0 = expand_normal (arg0);
24073 rtx op1 = expand_normal (arg1);
24074 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24075 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24076 enum rtx_code comparison = d->comparison;
24078 if (VECTOR_MODE_P (mode0))
24079 op0 = safe_vector_operand (op0, mode0);
24080 if (VECTOR_MODE_P (mode1))
24081 op1 = safe_vector_operand (op1, mode1);
24083 target = gen_reg_rtx (SImode);
24084 emit_move_insn (target, const0_rtx);
24085 target = gen_rtx_SUBREG (QImode, target, 0);
24087 if ((optimize && !register_operand (op0, mode0))
24088 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
24089 op0 = copy_to_mode_reg (mode0, op0);
24090 if ((optimize && !register_operand (op1, mode1))
24091 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
24092 op1 = copy_to_mode_reg (mode1, op1);
24094 pat = GEN_FCN (d->icode) (op0, op1);
24098 emit_insn (gen_rtx_SET (VOIDmode,
24099 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24100 gen_rtx_fmt_ee (comparison, QImode,
24104 return SUBREG_REG (target);
24107 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
24110 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
24111 tree exp, rtx target)
24114 tree arg0 = CALL_EXPR_ARG (exp, 0);
24115 tree arg1 = CALL_EXPR_ARG (exp, 1);
24116 tree arg2 = CALL_EXPR_ARG (exp, 2);
24117 tree arg3 = CALL_EXPR_ARG (exp, 3);
24118 tree arg4 = CALL_EXPR_ARG (exp, 4);
24119 rtx scratch0, scratch1;
24120 rtx op0 = expand_normal (arg0);
24121 rtx op1 = expand_normal (arg1);
24122 rtx op2 = expand_normal (arg2);
24123 rtx op3 = expand_normal (arg3);
24124 rtx op4 = expand_normal (arg4);
24125 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
24127 tmode0 = insn_data[d->icode].operand[0].mode;
24128 tmode1 = insn_data[d->icode].operand[1].mode;
24129 modev2 = insn_data[d->icode].operand[2].mode;
24130 modei3 = insn_data[d->icode].operand[3].mode;
24131 modev4 = insn_data[d->icode].operand[4].mode;
24132 modei5 = insn_data[d->icode].operand[5].mode;
24133 modeimm = insn_data[d->icode].operand[6].mode;
24135 if (VECTOR_MODE_P (modev2))
24136 op0 = safe_vector_operand (op0, modev2);
24137 if (VECTOR_MODE_P (modev4))
24138 op2 = safe_vector_operand (op2, modev4);
24140 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
24141 op0 = copy_to_mode_reg (modev2, op0);
24142 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
24143 op1 = copy_to_mode_reg (modei3, op1);
24144 if ((optimize && !register_operand (op2, modev4))
24145 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
24146 op2 = copy_to_mode_reg (modev4, op2);
24147 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
24148 op3 = copy_to_mode_reg (modei5, op3);
24150 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
24152 error ("the fifth argument must be a 8-bit immediate");
24156 if (d->code == IX86_BUILTIN_PCMPESTRI128)
24158 if (optimize || !target
24159 || GET_MODE (target) != tmode0
24160 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
24161 target = gen_reg_rtx (tmode0);
24163 scratch1 = gen_reg_rtx (tmode1);
24165 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
24167 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
24169 if (optimize || !target
24170 || GET_MODE (target) != tmode1
24171 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
24172 target = gen_reg_rtx (tmode1);
24174 scratch0 = gen_reg_rtx (tmode0);
24176 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
24180 gcc_assert (d->flag);
24182 scratch0 = gen_reg_rtx (tmode0);
24183 scratch1 = gen_reg_rtx (tmode1);
24185 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
24195 target = gen_reg_rtx (SImode);
24196 emit_move_insn (target, const0_rtx);
24197 target = gen_rtx_SUBREG (QImode, target, 0);
24200 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24201 gen_rtx_fmt_ee (EQ, QImode,
24202 gen_rtx_REG ((enum machine_mode) d->flag,
24205 return SUBREG_REG (target);
24212 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
24215 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
24216 tree exp, rtx target)
24219 tree arg0 = CALL_EXPR_ARG (exp, 0);
24220 tree arg1 = CALL_EXPR_ARG (exp, 1);
24221 tree arg2 = CALL_EXPR_ARG (exp, 2);
24222 rtx scratch0, scratch1;
24223 rtx op0 = expand_normal (arg0);
24224 rtx op1 = expand_normal (arg1);
24225 rtx op2 = expand_normal (arg2);
24226 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
24228 tmode0 = insn_data[d->icode].operand[0].mode;
24229 tmode1 = insn_data[d->icode].operand[1].mode;
24230 modev2 = insn_data[d->icode].operand[2].mode;
24231 modev3 = insn_data[d->icode].operand[3].mode;
24232 modeimm = insn_data[d->icode].operand[4].mode;
24234 if (VECTOR_MODE_P (modev2))
24235 op0 = safe_vector_operand (op0, modev2);
24236 if (VECTOR_MODE_P (modev3))
24237 op1 = safe_vector_operand (op1, modev3);
24239 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
24240 op0 = copy_to_mode_reg (modev2, op0);
24241 if ((optimize && !register_operand (op1, modev3))
24242 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
24243 op1 = copy_to_mode_reg (modev3, op1);
24245 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
24247 error ("the third argument must be a 8-bit immediate");
24251 if (d->code == IX86_BUILTIN_PCMPISTRI128)
24253 if (optimize || !target
24254 || GET_MODE (target) != tmode0
24255 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
24256 target = gen_reg_rtx (tmode0);
24258 scratch1 = gen_reg_rtx (tmode1);
24260 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
24262 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
24264 if (optimize || !target
24265 || GET_MODE (target) != tmode1
24266 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
24267 target = gen_reg_rtx (tmode1);
24269 scratch0 = gen_reg_rtx (tmode0);
24271 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
24275 gcc_assert (d->flag);
24277 scratch0 = gen_reg_rtx (tmode0);
24278 scratch1 = gen_reg_rtx (tmode1);
24280 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
24290 target = gen_reg_rtx (SImode);
24291 emit_move_insn (target, const0_rtx);
24292 target = gen_rtx_SUBREG (QImode, target, 0);
24295 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24296 gen_rtx_fmt_ee (EQ, QImode,
24297 gen_rtx_REG ((enum machine_mode) d->flag,
24300 return SUBREG_REG (target);
24306 /* Subroutine of ix86_expand_builtin to take care of insns with
24307 variable number of operands. */
24310 ix86_expand_args_builtin (const struct builtin_description *d,
24311 tree exp, rtx target)
24313 rtx pat, real_target;
24314 unsigned int i, nargs;
24315 unsigned int nargs_constant = 0;
24316 int num_memory = 0;
24320 enum machine_mode mode;
24322 bool last_arg_count = false;
24323 enum insn_code icode = d->icode;
24324 const struct insn_data_d *insn_p = &insn_data[icode];
24325 enum machine_mode tmode = insn_p->operand[0].mode;
24326 enum machine_mode rmode = VOIDmode;
24328 enum rtx_code comparison = d->comparison;
24330 switch ((enum ix86_builtin_func_type) d->flag)
24332 case INT_FTYPE_V8SF_V8SF_PTEST:
24333 case INT_FTYPE_V4DI_V4DI_PTEST:
24334 case INT_FTYPE_V4DF_V4DF_PTEST:
24335 case INT_FTYPE_V4SF_V4SF_PTEST:
24336 case INT_FTYPE_V2DI_V2DI_PTEST:
24337 case INT_FTYPE_V2DF_V2DF_PTEST:
24338 return ix86_expand_sse_ptest (d, exp, target);
24339 case FLOAT128_FTYPE_FLOAT128:
24340 case FLOAT_FTYPE_FLOAT:
24341 case INT_FTYPE_INT:
24342 case UINT64_FTYPE_INT:
24343 case UINT16_FTYPE_UINT16:
24344 case INT64_FTYPE_INT64:
24345 case INT64_FTYPE_V4SF:
24346 case INT64_FTYPE_V2DF:
24347 case INT_FTYPE_V16QI:
24348 case INT_FTYPE_V8QI:
24349 case INT_FTYPE_V8SF:
24350 case INT_FTYPE_V4DF:
24351 case INT_FTYPE_V4SF:
24352 case INT_FTYPE_V2DF:
24353 case V16QI_FTYPE_V16QI:
24354 case V8SI_FTYPE_V8SF:
24355 case V8SI_FTYPE_V4SI:
24356 case V8HI_FTYPE_V8HI:
24357 case V8HI_FTYPE_V16QI:
24358 case V8QI_FTYPE_V8QI:
24359 case V8SF_FTYPE_V8SF:
24360 case V8SF_FTYPE_V8SI:
24361 case V8SF_FTYPE_V4SF:
24362 case V8SF_FTYPE_V8HI:
24363 case V4SI_FTYPE_V4SI:
24364 case V4SI_FTYPE_V16QI:
24365 case V4SI_FTYPE_V4SF:
24366 case V4SI_FTYPE_V8SI:
24367 case V4SI_FTYPE_V8HI:
24368 case V4SI_FTYPE_V4DF:
24369 case V4SI_FTYPE_V2DF:
24370 case V4HI_FTYPE_V4HI:
24371 case V4DF_FTYPE_V4DF:
24372 case V4DF_FTYPE_V4SI:
24373 case V4DF_FTYPE_V4SF:
24374 case V4DF_FTYPE_V2DF:
24375 case V4SF_FTYPE_V4SF:
24376 case V4SF_FTYPE_V4SI:
24377 case V4SF_FTYPE_V8SF:
24378 case V4SF_FTYPE_V4DF:
24379 case V4SF_FTYPE_V8HI:
24380 case V4SF_FTYPE_V2DF:
24381 case V2DI_FTYPE_V2DI:
24382 case V2DI_FTYPE_V16QI:
24383 case V2DI_FTYPE_V8HI:
24384 case V2DI_FTYPE_V4SI:
24385 case V2DF_FTYPE_V2DF:
24386 case V2DF_FTYPE_V4SI:
24387 case V2DF_FTYPE_V4DF:
24388 case V2DF_FTYPE_V4SF:
24389 case V2DF_FTYPE_V2SI:
24390 case V2SI_FTYPE_V2SI:
24391 case V2SI_FTYPE_V4SF:
24392 case V2SI_FTYPE_V2SF:
24393 case V2SI_FTYPE_V2DF:
24394 case V2SF_FTYPE_V2SF:
24395 case V2SF_FTYPE_V2SI:
24398 case V4SF_FTYPE_V4SF_VEC_MERGE:
24399 case V2DF_FTYPE_V2DF_VEC_MERGE:
24400 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
24401 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
24402 case V16QI_FTYPE_V16QI_V16QI:
24403 case V16QI_FTYPE_V8HI_V8HI:
24404 case V8QI_FTYPE_V8QI_V8QI:
24405 case V8QI_FTYPE_V4HI_V4HI:
24406 case V8HI_FTYPE_V8HI_V8HI:
24407 case V8HI_FTYPE_V16QI_V16QI:
24408 case V8HI_FTYPE_V4SI_V4SI:
24409 case V8SF_FTYPE_V8SF_V8SF:
24410 case V8SF_FTYPE_V8SF_V8SI:
24411 case V4SI_FTYPE_V4SI_V4SI:
24412 case V4SI_FTYPE_V8HI_V8HI:
24413 case V4SI_FTYPE_V4SF_V4SF:
24414 case V4SI_FTYPE_V2DF_V2DF:
24415 case V4HI_FTYPE_V4HI_V4HI:
24416 case V4HI_FTYPE_V8QI_V8QI:
24417 case V4HI_FTYPE_V2SI_V2SI:
24418 case V4DF_FTYPE_V4DF_V4DF:
24419 case V4DF_FTYPE_V4DF_V4DI:
24420 case V4SF_FTYPE_V4SF_V4SF:
24421 case V4SF_FTYPE_V4SF_V4SI:
24422 case V4SF_FTYPE_V4SF_V2SI:
24423 case V4SF_FTYPE_V4SF_V2DF:
24424 case V4SF_FTYPE_V4SF_DI:
24425 case V4SF_FTYPE_V4SF_SI:
24426 case V2DI_FTYPE_V2DI_V2DI:
24427 case V2DI_FTYPE_V16QI_V16QI:
24428 case V2DI_FTYPE_V4SI_V4SI:
24429 case V2DI_FTYPE_V2DI_V16QI:
24430 case V2DI_FTYPE_V2DF_V2DF:
24431 case V2SI_FTYPE_V2SI_V2SI:
24432 case V2SI_FTYPE_V4HI_V4HI:
24433 case V2SI_FTYPE_V2SF_V2SF:
24434 case V2DF_FTYPE_V2DF_V2DF:
24435 case V2DF_FTYPE_V2DF_V4SF:
24436 case V2DF_FTYPE_V2DF_V2DI:
24437 case V2DF_FTYPE_V2DF_DI:
24438 case V2DF_FTYPE_V2DF_SI:
24439 case V2SF_FTYPE_V2SF_V2SF:
24440 case V1DI_FTYPE_V1DI_V1DI:
24441 case V1DI_FTYPE_V8QI_V8QI:
24442 case V1DI_FTYPE_V2SI_V2SI:
24443 if (comparison == UNKNOWN)
24444 return ix86_expand_binop_builtin (icode, exp, target);
24447 case V4SF_FTYPE_V4SF_V4SF_SWAP:
24448 case V2DF_FTYPE_V2DF_V2DF_SWAP:
24449 gcc_assert (comparison != UNKNOWN);
24453 case V8HI_FTYPE_V8HI_V8HI_COUNT:
24454 case V8HI_FTYPE_V8HI_SI_COUNT:
24455 case V4SI_FTYPE_V4SI_V4SI_COUNT:
24456 case V4SI_FTYPE_V4SI_SI_COUNT:
24457 case V4HI_FTYPE_V4HI_V4HI_COUNT:
24458 case V4HI_FTYPE_V4HI_SI_COUNT:
24459 case V2DI_FTYPE_V2DI_V2DI_COUNT:
24460 case V2DI_FTYPE_V2DI_SI_COUNT:
24461 case V2SI_FTYPE_V2SI_V2SI_COUNT:
24462 case V2SI_FTYPE_V2SI_SI_COUNT:
24463 case V1DI_FTYPE_V1DI_V1DI_COUNT:
24464 case V1DI_FTYPE_V1DI_SI_COUNT:
24466 last_arg_count = true;
24468 case UINT64_FTYPE_UINT64_UINT64:
24469 case UINT_FTYPE_UINT_UINT:
24470 case UINT_FTYPE_UINT_USHORT:
24471 case UINT_FTYPE_UINT_UCHAR:
24472 case UINT16_FTYPE_UINT16_INT:
24473 case UINT8_FTYPE_UINT8_INT:
24476 case V2DI_FTYPE_V2DI_INT_CONVERT:
24479 nargs_constant = 1;
24481 case V8HI_FTYPE_V8HI_INT:
24482 case V8HI_FTYPE_V8SF_INT:
24483 case V8HI_FTYPE_V4SF_INT:
24484 case V8SF_FTYPE_V8SF_INT:
24485 case V4SI_FTYPE_V4SI_INT:
24486 case V4SI_FTYPE_V8SI_INT:
24487 case V4HI_FTYPE_V4HI_INT:
24488 case V4DF_FTYPE_V4DF_INT:
24489 case V4SF_FTYPE_V4SF_INT:
24490 case V4SF_FTYPE_V8SF_INT:
24491 case V2DI_FTYPE_V2DI_INT:
24492 case V2DF_FTYPE_V2DF_INT:
24493 case V2DF_FTYPE_V4DF_INT:
24495 nargs_constant = 1;
24497 case V16QI_FTYPE_V16QI_V16QI_V16QI:
24498 case V8SF_FTYPE_V8SF_V8SF_V8SF:
24499 case V4DF_FTYPE_V4DF_V4DF_V4DF:
24500 case V4SF_FTYPE_V4SF_V4SF_V4SF:
24501 case V2DF_FTYPE_V2DF_V2DF_V2DF:
24504 case V16QI_FTYPE_V16QI_V16QI_INT:
24505 case V8HI_FTYPE_V8HI_V8HI_INT:
24506 case V8SI_FTYPE_V8SI_V8SI_INT:
24507 case V8SI_FTYPE_V8SI_V4SI_INT:
24508 case V8SF_FTYPE_V8SF_V8SF_INT:
24509 case V8SF_FTYPE_V8SF_V4SF_INT:
24510 case V4SI_FTYPE_V4SI_V4SI_INT:
24511 case V4DF_FTYPE_V4DF_V4DF_INT:
24512 case V4DF_FTYPE_V4DF_V2DF_INT:
24513 case V4SF_FTYPE_V4SF_V4SF_INT:
24514 case V2DI_FTYPE_V2DI_V2DI_INT:
24515 case V2DF_FTYPE_V2DF_V2DF_INT:
24517 nargs_constant = 1;
24519 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
24522 nargs_constant = 1;
24524 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
24527 nargs_constant = 1;
24529 case V2DI_FTYPE_V2DI_UINT_UINT:
24531 nargs_constant = 2;
24533 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
24534 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
24535 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
24536 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
24538 nargs_constant = 1;
24540 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
24542 nargs_constant = 2;
24545 gcc_unreachable ();
24548 gcc_assert (nargs <= ARRAY_SIZE (args));
24550 if (comparison != UNKNOWN)
24552 gcc_assert (nargs == 2);
24553 return ix86_expand_sse_compare (d, exp, target, swap);
24556 if (rmode == VOIDmode || rmode == tmode)
24560 || GET_MODE (target) != tmode
24561 || !insn_p->operand[0].predicate (target, tmode))
24562 target = gen_reg_rtx (tmode);
24563 real_target = target;
24567 target = gen_reg_rtx (rmode);
24568 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
24571 for (i = 0; i < nargs; i++)
24573 tree arg = CALL_EXPR_ARG (exp, i);
24574 rtx op = expand_normal (arg);
24575 enum machine_mode mode = insn_p->operand[i + 1].mode;
24576 bool match = insn_p->operand[i + 1].predicate (op, mode);
24578 if (last_arg_count && (i + 1) == nargs)
24580 /* SIMD shift insns take either an 8-bit immediate or
24581 register as count. But builtin functions take int as
24582 count. If count doesn't match, we put it in register. */
24585 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
24586 if (!insn_p->operand[i + 1].predicate (op, mode))
24587 op = copy_to_reg (op);
24590 else if ((nargs - i) <= nargs_constant)
24595 case CODE_FOR_sse4_1_roundpd:
24596 case CODE_FOR_sse4_1_roundps:
24597 case CODE_FOR_sse4_1_roundsd:
24598 case CODE_FOR_sse4_1_roundss:
24599 case CODE_FOR_sse4_1_blendps:
24600 case CODE_FOR_avx_blendpd256:
24601 case CODE_FOR_avx_vpermilv4df:
24602 case CODE_FOR_avx_roundpd256:
24603 case CODE_FOR_avx_roundps256:
24604 error ("the last argument must be a 4-bit immediate");
24607 case CODE_FOR_sse4_1_blendpd:
24608 case CODE_FOR_avx_vpermilv2df:
24609 case CODE_FOR_xop_vpermil2v2df3:
24610 case CODE_FOR_xop_vpermil2v4sf3:
24611 case CODE_FOR_xop_vpermil2v4df3:
24612 case CODE_FOR_xop_vpermil2v8sf3:
24613 error ("the last argument must be a 2-bit immediate");
24616 case CODE_FOR_avx_vextractf128v4df:
24617 case CODE_FOR_avx_vextractf128v8sf:
24618 case CODE_FOR_avx_vextractf128v8si:
24619 case CODE_FOR_avx_vinsertf128v4df:
24620 case CODE_FOR_avx_vinsertf128v8sf:
24621 case CODE_FOR_avx_vinsertf128v8si:
24622 error ("the last argument must be a 1-bit immediate");
24625 case CODE_FOR_avx_cmpsdv2df3:
24626 case CODE_FOR_avx_cmpssv4sf3:
24627 case CODE_FOR_avx_cmppdv2df3:
24628 case CODE_FOR_avx_cmppsv4sf3:
24629 case CODE_FOR_avx_cmppdv4df3:
24630 case CODE_FOR_avx_cmppsv8sf3:
24631 error ("the last argument must be a 5-bit immediate");
24635 switch (nargs_constant)
24638 if ((nargs - i) == nargs_constant)
24640 error ("the next to last argument must be an 8-bit immediate");
24644 error ("the last argument must be an 8-bit immediate");
24647 gcc_unreachable ();
24654 if (VECTOR_MODE_P (mode))
24655 op = safe_vector_operand (op, mode);
24657 /* If we aren't optimizing, only allow one memory operand to
24659 if (memory_operand (op, mode))
24662 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
24664 if (optimize || !match || num_memory > 1)
24665 op = copy_to_mode_reg (mode, op);
24669 op = copy_to_reg (op);
24670 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
24675 args[i].mode = mode;
24681 pat = GEN_FCN (icode) (real_target, args[0].op);
24684 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
24687 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24691 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24692 args[2].op, args[3].op);
24695 gcc_unreachable ();
24705 /* Subroutine of ix86_expand_builtin to take care of special insns
24706 with variable number of operands. */
24709 ix86_expand_special_args_builtin (const struct builtin_description *d,
24710 tree exp, rtx target)
24714 unsigned int i, nargs, arg_adjust, memory;
24718 enum machine_mode mode;
24720 enum insn_code icode = d->icode;
24721 bool last_arg_constant = false;
24722 const struct insn_data_d *insn_p = &insn_data[icode];
24723 enum machine_mode tmode = insn_p->operand[0].mode;
24724 enum { load, store } klass;
24726 switch ((enum ix86_builtin_func_type) d->flag)
24728 case VOID_FTYPE_VOID:
24729 emit_insn (GEN_FCN (icode) (target));
24731 case VOID_FTYPE_UINT64:
24732 case VOID_FTYPE_UNSIGNED:
24738 case UINT64_FTYPE_VOID:
24739 case UNSIGNED_FTYPE_VOID:
24740 case UINT16_FTYPE_VOID:
24745 case UINT64_FTYPE_PUNSIGNED:
24746 case V2DI_FTYPE_PV2DI:
24747 case V32QI_FTYPE_PCCHAR:
24748 case V16QI_FTYPE_PCCHAR:
24749 case V8SF_FTYPE_PCV4SF:
24750 case V8SF_FTYPE_PCFLOAT:
24751 case V4SF_FTYPE_PCFLOAT:
24752 case V4DF_FTYPE_PCV2DF:
24753 case V4DF_FTYPE_PCDOUBLE:
24754 case V2DF_FTYPE_PCDOUBLE:
24755 case VOID_FTYPE_PVOID:
24760 case VOID_FTYPE_PV2SF_V4SF:
24761 case VOID_FTYPE_PV4DI_V4DI:
24762 case VOID_FTYPE_PV2DI_V2DI:
24763 case VOID_FTYPE_PCHAR_V32QI:
24764 case VOID_FTYPE_PCHAR_V16QI:
24765 case VOID_FTYPE_PFLOAT_V8SF:
24766 case VOID_FTYPE_PFLOAT_V4SF:
24767 case VOID_FTYPE_PDOUBLE_V4DF:
24768 case VOID_FTYPE_PDOUBLE_V2DF:
24769 case VOID_FTYPE_PULONGLONG_ULONGLONG:
24770 case VOID_FTYPE_PINT_INT:
24773 /* Reserve memory operand for target. */
24774 memory = ARRAY_SIZE (args);
24776 case V4SF_FTYPE_V4SF_PCV2SF:
24777 case V2DF_FTYPE_V2DF_PCDOUBLE:
24782 case V8SF_FTYPE_PCV8SF_V8SF:
24783 case V4DF_FTYPE_PCV4DF_V4DF:
24784 case V4SF_FTYPE_PCV4SF_V4SF:
24785 case V2DF_FTYPE_PCV2DF_V2DF:
24790 case VOID_FTYPE_PV8SF_V8SF_V8SF:
24791 case VOID_FTYPE_PV4DF_V4DF_V4DF:
24792 case VOID_FTYPE_PV4SF_V4SF_V4SF:
24793 case VOID_FTYPE_PV2DF_V2DF_V2DF:
24796 /* Reserve memory operand for target. */
24797 memory = ARRAY_SIZE (args);
24799 case VOID_FTYPE_UINT_UINT_UINT:
24800 case VOID_FTYPE_UINT64_UINT_UINT:
24801 case UCHAR_FTYPE_UINT_UINT_UINT:
24802 case UCHAR_FTYPE_UINT64_UINT_UINT:
24805 memory = ARRAY_SIZE (args);
24806 last_arg_constant = true;
24809 gcc_unreachable ();
24812 gcc_assert (nargs <= ARRAY_SIZE (args));
24814 if (klass == store)
24816 arg = CALL_EXPR_ARG (exp, 0);
24817 op = expand_normal (arg);
24818 gcc_assert (target == 0);
24820 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
24822 target = force_reg (tmode, op);
24830 || GET_MODE (target) != tmode
24831 || !insn_p->operand[0].predicate (target, tmode))
24832 target = gen_reg_rtx (tmode);
24835 for (i = 0; i < nargs; i++)
24837 enum machine_mode mode = insn_p->operand[i + 1].mode;
24840 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
24841 op = expand_normal (arg);
24842 match = insn_p->operand[i + 1].predicate (op, mode);
24844 if (last_arg_constant && (i + 1) == nargs)
24848 if (icode == CODE_FOR_lwp_lwpvalsi3
24849 || icode == CODE_FOR_lwp_lwpinssi3
24850 || icode == CODE_FOR_lwp_lwpvaldi3
24851 || icode == CODE_FOR_lwp_lwpinsdi3)
24852 error ("the last argument must be a 32-bit immediate");
24854 error ("the last argument must be an 8-bit immediate");
24862 /* This must be the memory operand. */
24863 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
24864 gcc_assert (GET_MODE (op) == mode
24865 || GET_MODE (op) == VOIDmode);
24869 /* This must be register. */
24870 if (VECTOR_MODE_P (mode))
24871 op = safe_vector_operand (op, mode);
24873 gcc_assert (GET_MODE (op) == mode
24874 || GET_MODE (op) == VOIDmode);
24875 op = copy_to_mode_reg (mode, op);
24880 args[i].mode = mode;
24886 pat = GEN_FCN (icode) (target);
24889 pat = GEN_FCN (icode) (target, args[0].op);
24892 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
24895 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
24898 gcc_unreachable ();
24904 return klass == store ? 0 : target;
24907 /* Return the integer constant in ARG. Constrain it to be in the range
24908 of the subparts of VEC_TYPE; issue an error if not. */
24911 get_element_number (tree vec_type, tree arg)
24913 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
24915 if (!host_integerp (arg, 1)
24916 || (elt = tree_low_cst (arg, 1), elt > max))
24918 error ("selector must be an integer constant in the range 0..%wi", max);
24925 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
24926 ix86_expand_vector_init. We DO have language-level syntax for this, in
24927 the form of (type){ init-list }. Except that since we can't place emms
24928 instructions from inside the compiler, we can't allow the use of MMX
24929 registers unless the user explicitly asks for it. So we do *not* define
24930 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
24931 we have builtins invoked by mmintrin.h that gives us license to emit
24932 these sorts of instructions. */
24935 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
24937 enum machine_mode tmode = TYPE_MODE (type);
24938 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
24939 int i, n_elt = GET_MODE_NUNITS (tmode);
24940 rtvec v = rtvec_alloc (n_elt);
24942 gcc_assert (VECTOR_MODE_P (tmode));
24943 gcc_assert (call_expr_nargs (exp) == n_elt);
24945 for (i = 0; i < n_elt; ++i)
24947 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
24948 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
24951 if (!target || !register_operand (target, tmode))
24952 target = gen_reg_rtx (tmode);
24954 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
24958 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
24959 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
24960 had a language-level syntax for referencing vector elements. */
24963 ix86_expand_vec_ext_builtin (tree exp, rtx target)
24965 enum machine_mode tmode, mode0;
24970 arg0 = CALL_EXPR_ARG (exp, 0);
24971 arg1 = CALL_EXPR_ARG (exp, 1);
24973 op0 = expand_normal (arg0);
24974 elt = get_element_number (TREE_TYPE (arg0), arg1);
24976 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
24977 mode0 = TYPE_MODE (TREE_TYPE (arg0));
24978 gcc_assert (VECTOR_MODE_P (mode0));
24980 op0 = force_reg (mode0, op0);
24982 if (optimize || !target || !register_operand (target, tmode))
24983 target = gen_reg_rtx (tmode);
24985 ix86_expand_vector_extract (true, target, op0, elt);
24990 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
24991 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
24992 a language-level syntax for referencing vector elements. */
24995 ix86_expand_vec_set_builtin (tree exp)
24997 enum machine_mode tmode, mode1;
24998 tree arg0, arg1, arg2;
25000 rtx op0, op1, target;
25002 arg0 = CALL_EXPR_ARG (exp, 0);
25003 arg1 = CALL_EXPR_ARG (exp, 1);
25004 arg2 = CALL_EXPR_ARG (exp, 2);
25006 tmode = TYPE_MODE (TREE_TYPE (arg0));
25007 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25008 gcc_assert (VECTOR_MODE_P (tmode));
25010 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
25011 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
25012 elt = get_element_number (TREE_TYPE (arg0), arg2);
25014 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
25015 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
25017 op0 = force_reg (tmode, op0);
25018 op1 = force_reg (mode1, op1);
25020 /* OP0 is the source of these builtin functions and shouldn't be
25021 modified. Create a copy, use it and return it as target. */
25022 target = gen_reg_rtx (tmode);
25023 emit_move_insn (target, op0);
25024 ix86_expand_vector_set (true, target, op1, elt);
25029 /* Expand an expression EXP that calls a built-in function,
25030 with result going to TARGET if that's convenient
25031 (and in mode MODE if that's convenient).
25032 SUBTARGET may be used as the target for computing one of EXP's operands.
25033 IGNORE is nonzero if the value is to be ignored. */
25036 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
25037 enum machine_mode mode ATTRIBUTE_UNUSED,
25038 int ignore ATTRIBUTE_UNUSED)
25040 const struct builtin_description *d;
25042 enum insn_code icode;
25043 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25044 tree arg0, arg1, arg2;
25045 rtx op0, op1, op2, pat;
25046 enum machine_mode mode0, mode1, mode2;
25047 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25049 /* Determine whether the builtin function is available under the current ISA.
25050 Originally the builtin was not created if it wasn't applicable to the
25051 current ISA based on the command line switches. With function specific
25052 options, we need to check in the context of the function making the call
25053 whether it is supported. */
25054 if (ix86_builtins_isa[fcode].isa
25055 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
25057 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
25058 NULL, NULL, false);
25061 error ("%qE needs unknown isa option", fndecl);
25064 gcc_assert (opts != NULL);
25065 error ("%qE needs isa option %s", fndecl, opts);
25073 case IX86_BUILTIN_MASKMOVQ:
25074 case IX86_BUILTIN_MASKMOVDQU:
25075 icode = (fcode == IX86_BUILTIN_MASKMOVQ
25076 ? CODE_FOR_mmx_maskmovq
25077 : CODE_FOR_sse2_maskmovdqu);
25078 /* Note the arg order is different from the operand order. */
25079 arg1 = CALL_EXPR_ARG (exp, 0);
25080 arg2 = CALL_EXPR_ARG (exp, 1);
25081 arg0 = CALL_EXPR_ARG (exp, 2);
25082 op0 = expand_normal (arg0);
25083 op1 = expand_normal (arg1);
25084 op2 = expand_normal (arg2);
25085 mode0 = insn_data[icode].operand[0].mode;
25086 mode1 = insn_data[icode].operand[1].mode;
25087 mode2 = insn_data[icode].operand[2].mode;
25089 op0 = force_reg (Pmode, op0);
25090 op0 = gen_rtx_MEM (mode1, op0);
25092 if (!insn_data[icode].operand[0].predicate (op0, mode0))
25093 op0 = copy_to_mode_reg (mode0, op0);
25094 if (!insn_data[icode].operand[1].predicate (op1, mode1))
25095 op1 = copy_to_mode_reg (mode1, op1);
25096 if (!insn_data[icode].operand[2].predicate (op2, mode2))
25097 op2 = copy_to_mode_reg (mode2, op2);
25098 pat = GEN_FCN (icode) (op0, op1, op2);
25104 case IX86_BUILTIN_LDMXCSR:
25105 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
25106 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25107 emit_move_insn (target, op0);
25108 emit_insn (gen_sse_ldmxcsr (target));
25111 case IX86_BUILTIN_STMXCSR:
25112 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25113 emit_insn (gen_sse_stmxcsr (target));
25114 return copy_to_mode_reg (SImode, target);
25116 case IX86_BUILTIN_CLFLUSH:
25117 arg0 = CALL_EXPR_ARG (exp, 0);
25118 op0 = expand_normal (arg0);
25119 icode = CODE_FOR_sse2_clflush;
25120 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
25121 op0 = copy_to_mode_reg (Pmode, op0);
25123 emit_insn (gen_sse2_clflush (op0));
25126 case IX86_BUILTIN_MONITOR:
25127 arg0 = CALL_EXPR_ARG (exp, 0);
25128 arg1 = CALL_EXPR_ARG (exp, 1);
25129 arg2 = CALL_EXPR_ARG (exp, 2);
25130 op0 = expand_normal (arg0);
25131 op1 = expand_normal (arg1);
25132 op2 = expand_normal (arg2);
25134 op0 = copy_to_mode_reg (Pmode, op0);
25136 op1 = copy_to_mode_reg (SImode, op1);
25138 op2 = copy_to_mode_reg (SImode, op2);
25139 emit_insn (ix86_gen_monitor (op0, op1, op2));
25142 case IX86_BUILTIN_MWAIT:
25143 arg0 = CALL_EXPR_ARG (exp, 0);
25144 arg1 = CALL_EXPR_ARG (exp, 1);
25145 op0 = expand_normal (arg0);
25146 op1 = expand_normal (arg1);
25148 op0 = copy_to_mode_reg (SImode, op0);
25150 op1 = copy_to_mode_reg (SImode, op1);
25151 emit_insn (gen_sse3_mwait (op0, op1));
25154 case IX86_BUILTIN_VEC_INIT_V2SI:
25155 case IX86_BUILTIN_VEC_INIT_V4HI:
25156 case IX86_BUILTIN_VEC_INIT_V8QI:
25157 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
25159 case IX86_BUILTIN_VEC_EXT_V2DF:
25160 case IX86_BUILTIN_VEC_EXT_V2DI:
25161 case IX86_BUILTIN_VEC_EXT_V4SF:
25162 case IX86_BUILTIN_VEC_EXT_V4SI:
25163 case IX86_BUILTIN_VEC_EXT_V8HI:
25164 case IX86_BUILTIN_VEC_EXT_V2SI:
25165 case IX86_BUILTIN_VEC_EXT_V4HI:
25166 case IX86_BUILTIN_VEC_EXT_V16QI:
25167 return ix86_expand_vec_ext_builtin (exp, target);
25169 case IX86_BUILTIN_VEC_SET_V2DI:
25170 case IX86_BUILTIN_VEC_SET_V4SF:
25171 case IX86_BUILTIN_VEC_SET_V4SI:
25172 case IX86_BUILTIN_VEC_SET_V8HI:
25173 case IX86_BUILTIN_VEC_SET_V4HI:
25174 case IX86_BUILTIN_VEC_SET_V16QI:
25175 return ix86_expand_vec_set_builtin (exp);
25177 case IX86_BUILTIN_VEC_PERM_V2DF:
25178 case IX86_BUILTIN_VEC_PERM_V4SF:
25179 case IX86_BUILTIN_VEC_PERM_V2DI:
25180 case IX86_BUILTIN_VEC_PERM_V4SI:
25181 case IX86_BUILTIN_VEC_PERM_V8HI:
25182 case IX86_BUILTIN_VEC_PERM_V16QI:
25183 case IX86_BUILTIN_VEC_PERM_V2DI_U:
25184 case IX86_BUILTIN_VEC_PERM_V4SI_U:
25185 case IX86_BUILTIN_VEC_PERM_V8HI_U:
25186 case IX86_BUILTIN_VEC_PERM_V16QI_U:
25187 case IX86_BUILTIN_VEC_PERM_V4DF:
25188 case IX86_BUILTIN_VEC_PERM_V8SF:
25189 return ix86_expand_vec_perm_builtin (exp);
25191 case IX86_BUILTIN_INFQ:
25192 case IX86_BUILTIN_HUGE_VALQ:
25194 REAL_VALUE_TYPE inf;
25198 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
25200 tmp = validize_mem (force_const_mem (mode, tmp));
25203 target = gen_reg_rtx (mode);
25205 emit_move_insn (target, tmp);
25209 case IX86_BUILTIN_LLWPCB:
25210 arg0 = CALL_EXPR_ARG (exp, 0);
25211 op0 = expand_normal (arg0);
25212 icode = CODE_FOR_lwp_llwpcb;
25213 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
25214 op0 = copy_to_mode_reg (Pmode, op0);
25215 emit_insn (gen_lwp_llwpcb (op0));
25218 case IX86_BUILTIN_SLWPCB:
25219 icode = CODE_FOR_lwp_slwpcb;
25221 || !insn_data[icode].operand[0].predicate (target, Pmode))
25222 target = gen_reg_rtx (Pmode);
25223 emit_insn (gen_lwp_slwpcb (target));
25230 for (i = 0, d = bdesc_special_args;
25231 i < ARRAY_SIZE (bdesc_special_args);
25233 if (d->code == fcode)
25234 return ix86_expand_special_args_builtin (d, exp, target);
25236 for (i = 0, d = bdesc_args;
25237 i < ARRAY_SIZE (bdesc_args);
25239 if (d->code == fcode)
25242 case IX86_BUILTIN_FABSQ:
25243 case IX86_BUILTIN_COPYSIGNQ:
25245 /* Emit a normal call if SSE2 isn't available. */
25246 return expand_call (exp, target, ignore);
25248 return ix86_expand_args_builtin (d, exp, target);
25251 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25252 if (d->code == fcode)
25253 return ix86_expand_sse_comi (d, exp, target);
25255 for (i = 0, d = bdesc_pcmpestr;
25256 i < ARRAY_SIZE (bdesc_pcmpestr);
25258 if (d->code == fcode)
25259 return ix86_expand_sse_pcmpestr (d, exp, target);
25261 for (i = 0, d = bdesc_pcmpistr;
25262 i < ARRAY_SIZE (bdesc_pcmpistr);
25264 if (d->code == fcode)
25265 return ix86_expand_sse_pcmpistr (d, exp, target);
25267 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25268 if (d->code == fcode)
25269 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
25270 (enum ix86_builtin_func_type)
25271 d->flag, d->comparison);
25273 gcc_unreachable ();
25276 /* Returns a function decl for a vectorized version of the builtin function
25277 with builtin function code FN and the result vector type TYPE, or NULL_TREE
25278 if it is not available. */
25281 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
25284 enum machine_mode in_mode, out_mode;
25286 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
25288 if (TREE_CODE (type_out) != VECTOR_TYPE
25289 || TREE_CODE (type_in) != VECTOR_TYPE
25290 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
25293 out_mode = TYPE_MODE (TREE_TYPE (type_out));
25294 out_n = TYPE_VECTOR_SUBPARTS (type_out);
25295 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25296 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25300 case BUILT_IN_SQRT:
25301 if (out_mode == DFmode && out_n == 2
25302 && in_mode == DFmode && in_n == 2)
25303 return ix86_builtins[IX86_BUILTIN_SQRTPD];
25306 case BUILT_IN_SQRTF:
25307 if (out_mode == SFmode && out_n == 4
25308 && in_mode == SFmode && in_n == 4)
25309 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
25312 case BUILT_IN_LRINT:
25313 if (out_mode == SImode && out_n == 4
25314 && in_mode == DFmode && in_n == 2)
25315 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
25318 case BUILT_IN_LRINTF:
25319 if (out_mode == SImode && out_n == 4
25320 && in_mode == SFmode && in_n == 4)
25321 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
25324 case BUILT_IN_COPYSIGN:
25325 if (out_mode == DFmode && out_n == 2
25326 && in_mode == DFmode && in_n == 2)
25327 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
25330 case BUILT_IN_COPYSIGNF:
25331 if (out_mode == SFmode && out_n == 4
25332 && in_mode == SFmode && in_n == 4)
25333 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
25340 /* Dispatch to a handler for a vectorization library. */
25341 if (ix86_veclib_handler)
25342 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
25348 /* Handler for an SVML-style interface to
25349 a library with vectorized intrinsics. */
25352 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
25355 tree fntype, new_fndecl, args;
25358 enum machine_mode el_mode, in_mode;
25361 /* The SVML is suitable for unsafe math only. */
25362 if (!flag_unsafe_math_optimizations)
25365 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25366 n = TYPE_VECTOR_SUBPARTS (type_out);
25367 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25368 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25369 if (el_mode != in_mode
25377 case BUILT_IN_LOG10:
25379 case BUILT_IN_TANH:
25381 case BUILT_IN_ATAN:
25382 case BUILT_IN_ATAN2:
25383 case BUILT_IN_ATANH:
25384 case BUILT_IN_CBRT:
25385 case BUILT_IN_SINH:
25387 case BUILT_IN_ASINH:
25388 case BUILT_IN_ASIN:
25389 case BUILT_IN_COSH:
25391 case BUILT_IN_ACOSH:
25392 case BUILT_IN_ACOS:
25393 if (el_mode != DFmode || n != 2)
25397 case BUILT_IN_EXPF:
25398 case BUILT_IN_LOGF:
25399 case BUILT_IN_LOG10F:
25400 case BUILT_IN_POWF:
25401 case BUILT_IN_TANHF:
25402 case BUILT_IN_TANF:
25403 case BUILT_IN_ATANF:
25404 case BUILT_IN_ATAN2F:
25405 case BUILT_IN_ATANHF:
25406 case BUILT_IN_CBRTF:
25407 case BUILT_IN_SINHF:
25408 case BUILT_IN_SINF:
25409 case BUILT_IN_ASINHF:
25410 case BUILT_IN_ASINF:
25411 case BUILT_IN_COSHF:
25412 case BUILT_IN_COSF:
25413 case BUILT_IN_ACOSHF:
25414 case BUILT_IN_ACOSF:
25415 if (el_mode != SFmode || n != 4)
25423 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25425 if (fn == BUILT_IN_LOGF)
25426 strcpy (name, "vmlsLn4");
25427 else if (fn == BUILT_IN_LOG)
25428 strcpy (name, "vmldLn2");
25431 sprintf (name, "vmls%s", bname+10);
25432 name[strlen (name)-1] = '4';
25435 sprintf (name, "vmld%s2", bname+10);
25437 /* Convert to uppercase. */
25441 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25442 args = TREE_CHAIN (args))
25446 fntype = build_function_type_list (type_out, type_in, NULL);
25448 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25450 /* Build a function declaration for the vectorized function. */
25451 new_fndecl = build_decl (BUILTINS_LOCATION,
25452 FUNCTION_DECL, get_identifier (name), fntype);
25453 TREE_PUBLIC (new_fndecl) = 1;
25454 DECL_EXTERNAL (new_fndecl) = 1;
25455 DECL_IS_NOVOPS (new_fndecl) = 1;
25456 TREE_READONLY (new_fndecl) = 1;
25461 /* Handler for an ACML-style interface to
25462 a library with vectorized intrinsics. */
25465 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
25467 char name[20] = "__vr.._";
25468 tree fntype, new_fndecl, args;
25471 enum machine_mode el_mode, in_mode;
25474 /* The ACML is 64bits only and suitable for unsafe math only as
25475 it does not correctly support parts of IEEE with the required
25476 precision such as denormals. */
25478 || !flag_unsafe_math_optimizations)
25481 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25482 n = TYPE_VECTOR_SUBPARTS (type_out);
25483 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25484 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25485 if (el_mode != in_mode
25495 case BUILT_IN_LOG2:
25496 case BUILT_IN_LOG10:
25499 if (el_mode != DFmode
25504 case BUILT_IN_SINF:
25505 case BUILT_IN_COSF:
25506 case BUILT_IN_EXPF:
25507 case BUILT_IN_POWF:
25508 case BUILT_IN_LOGF:
25509 case BUILT_IN_LOG2F:
25510 case BUILT_IN_LOG10F:
25513 if (el_mode != SFmode
25522 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25523 sprintf (name + 7, "%s", bname+10);
25526 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25527 args = TREE_CHAIN (args))
25531 fntype = build_function_type_list (type_out, type_in, NULL);
25533 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25535 /* Build a function declaration for the vectorized function. */
25536 new_fndecl = build_decl (BUILTINS_LOCATION,
25537 FUNCTION_DECL, get_identifier (name), fntype);
25538 TREE_PUBLIC (new_fndecl) = 1;
25539 DECL_EXTERNAL (new_fndecl) = 1;
25540 DECL_IS_NOVOPS (new_fndecl) = 1;
25541 TREE_READONLY (new_fndecl) = 1;
25547 /* Returns a decl of a function that implements conversion of an integer vector
25548 into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
25549 are the types involved when converting according to CODE.
25550 Return NULL_TREE if it is not available. */
25553 ix86_vectorize_builtin_conversion (unsigned int code,
25554 tree dest_type, tree src_type)
25562 switch (TYPE_MODE (src_type))
25565 switch (TYPE_MODE (dest_type))
25568 return (TYPE_UNSIGNED (src_type)
25569 ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
25570 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
25572 return (TYPE_UNSIGNED (src_type)
25574 : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
25580 switch (TYPE_MODE (dest_type))
25583 return (TYPE_UNSIGNED (src_type)
25585 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
25594 case FIX_TRUNC_EXPR:
25595 switch (TYPE_MODE (dest_type))
25598 switch (TYPE_MODE (src_type))
25601 return (TYPE_UNSIGNED (dest_type)
25603 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
25605 return (TYPE_UNSIGNED (dest_type)
25607 : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
25614 switch (TYPE_MODE (src_type))
25617 return (TYPE_UNSIGNED (dest_type)
25619 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
25636 /* Returns a code for a target-specific builtin that implements
25637 reciprocal of the function, or NULL_TREE if not available. */
25640 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
25641 bool sqrt ATTRIBUTE_UNUSED)
25643 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
25644 && flag_finite_math_only && !flag_trapping_math
25645 && flag_unsafe_math_optimizations))
25649 /* Machine dependent builtins. */
25652 /* Vectorized version of sqrt to rsqrt conversion. */
25653 case IX86_BUILTIN_SQRTPS_NR:
25654 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
25660 /* Normal builtins. */
25663 /* Sqrt to rsqrt conversion. */
25664 case BUILT_IN_SQRTF:
25665 return ix86_builtins[IX86_BUILTIN_RSQRTF];
25672 /* Helper for avx_vpermilps256_operand et al. This is also used by
25673 the expansion functions to turn the parallel back into a mask.
25674 The return value is 0 for no match and the imm8+1 for a match. */
25677 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
25679 unsigned i, nelt = GET_MODE_NUNITS (mode);
25681 unsigned char ipar[8];
25683 if (XVECLEN (par, 0) != (int) nelt)
25686 /* Validate that all of the elements are constants, and not totally
25687 out of range. Copy the data into an integral array to make the
25688 subsequent checks easier. */
25689 for (i = 0; i < nelt; ++i)
25691 rtx er = XVECEXP (par, 0, i);
25692 unsigned HOST_WIDE_INT ei;
25694 if (!CONST_INT_P (er))
25705 /* In the 256-bit DFmode case, we can only move elements within
25707 for (i = 0; i < 2; ++i)
25711 mask |= ipar[i] << i;
25713 for (i = 2; i < 4; ++i)
25717 mask |= (ipar[i] - 2) << i;
25722 /* In the 256-bit SFmode case, we have full freedom of movement
25723 within the low 128-bit lane, but the high 128-bit lane must
25724 mirror the exact same pattern. */
25725 for (i = 0; i < 4; ++i)
25726 if (ipar[i] + 4 != ipar[i + 4])
25733 /* In the 128-bit case, we've full freedom in the placement of
25734 the elements from the source operand. */
25735 for (i = 0; i < nelt; ++i)
25736 mask |= ipar[i] << (i * (nelt / 2));
25740 gcc_unreachable ();
25743 /* Make sure success has a non-zero value by adding one. */
25747 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
25748 the expansion functions to turn the parallel back into a mask.
25749 The return value is 0 for no match and the imm8+1 for a match. */
25752 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
25754 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
25756 unsigned char ipar[8];
25758 if (XVECLEN (par, 0) != (int) nelt)
25761 /* Validate that all of the elements are constants, and not totally
25762 out of range. Copy the data into an integral array to make the
25763 subsequent checks easier. */
25764 for (i = 0; i < nelt; ++i)
25766 rtx er = XVECEXP (par, 0, i);
25767 unsigned HOST_WIDE_INT ei;
25769 if (!CONST_INT_P (er))
25772 if (ei >= 2 * nelt)
25777 /* Validate that the halves of the permute are halves. */
25778 for (i = 0; i < nelt2 - 1; ++i)
25779 if (ipar[i] + 1 != ipar[i + 1])
25781 for (i = nelt2; i < nelt - 1; ++i)
25782 if (ipar[i] + 1 != ipar[i + 1])
25785 /* Reconstruct the mask. */
25786 for (i = 0; i < 2; ++i)
25788 unsigned e = ipar[i * nelt2];
25792 mask |= e << (i * 4);
25795 /* Make sure success has a non-zero value by adding one. */
25800 /* Store OPERAND to the memory after reload is completed. This means
25801 that we can't easily use assign_stack_local. */
25803 ix86_force_to_memory (enum machine_mode mode, rtx operand)
25807 gcc_assert (reload_completed);
25808 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE)
25810 result = gen_rtx_MEM (mode,
25811 gen_rtx_PLUS (Pmode,
25813 GEN_INT (-RED_ZONE_SIZE)));
25814 emit_move_insn (result, operand);
25816 else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT)
25822 operand = gen_lowpart (DImode, operand);
25826 gen_rtx_SET (VOIDmode,
25827 gen_rtx_MEM (DImode,
25828 gen_rtx_PRE_DEC (DImode,
25829 stack_pointer_rtx)),
25833 gcc_unreachable ();
25835 result = gen_rtx_MEM (mode, stack_pointer_rtx);
25844 split_di (&operand, 1, operands, operands + 1);
25846 gen_rtx_SET (VOIDmode,
25847 gen_rtx_MEM (SImode,
25848 gen_rtx_PRE_DEC (Pmode,
25849 stack_pointer_rtx)),
25852 gen_rtx_SET (VOIDmode,
25853 gen_rtx_MEM (SImode,
25854 gen_rtx_PRE_DEC (Pmode,
25855 stack_pointer_rtx)),
25860 /* Store HImodes as SImodes. */
25861 operand = gen_lowpart (SImode, operand);
25865 gen_rtx_SET (VOIDmode,
25866 gen_rtx_MEM (GET_MODE (operand),
25867 gen_rtx_PRE_DEC (SImode,
25868 stack_pointer_rtx)),
25872 gcc_unreachable ();
25874 result = gen_rtx_MEM (mode, stack_pointer_rtx);
25879 /* Free operand from the memory. */
25881 ix86_free_from_memory (enum machine_mode mode)
25883 if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI)
25887 if (mode == DImode || TARGET_64BIT)
25891 /* Use LEA to deallocate stack space. In peephole2 it will be converted
25892 to pop or add instruction if registers are available. */
25893 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
25894 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
25899 /* Implement TARGET_IRA_COVER_CLASSES. If -mfpmath=sse, we prefer
25900 SSE_REGS to FLOAT_REGS if their costs for a pseudo are the
25902 static const reg_class_t *
25903 i386_ira_cover_classes (void)
25905 static const reg_class_t sse_fpmath_classes[] = {
25906 GENERAL_REGS, SSE_REGS, MMX_REGS, FLOAT_REGS, LIM_REG_CLASSES
25908 static const reg_class_t no_sse_fpmath_classes[] = {
25909 GENERAL_REGS, FLOAT_REGS, MMX_REGS, SSE_REGS, LIM_REG_CLASSES
25912 return TARGET_SSE_MATH ? sse_fpmath_classes : no_sse_fpmath_classes;
25915 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
25916 QImode must go into class Q_REGS.
25917 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
25918 movdf to do mem-to-mem moves through integer regs. */
25920 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
25922 enum machine_mode mode = GET_MODE (x);
25924 /* We're only allowed to return a subclass of CLASS. Many of the
25925 following checks fail for NO_REGS, so eliminate that early. */
25926 if (regclass == NO_REGS)
25929 /* All classes can load zeros. */
25930 if (x == CONST0_RTX (mode))
25933 /* Force constants into memory if we are loading a (nonzero) constant into
25934 an MMX or SSE register. This is because there are no MMX/SSE instructions
25935 to load from a constant. */
25937 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
25940 /* Prefer SSE regs only, if we can use them for math. */
25941 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
25942 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
25944 /* Floating-point constants need more complex checks. */
25945 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
25947 /* General regs can load everything. */
25948 if (reg_class_subset_p (regclass, GENERAL_REGS))
25951 /* Floats can load 0 and 1 plus some others. Note that we eliminated
25952 zero above. We only want to wind up preferring 80387 registers if
25953 we plan on doing computation with them. */
25955 && standard_80387_constant_p (x))
25957 /* Limit class to non-sse. */
25958 if (regclass == FLOAT_SSE_REGS)
25960 if (regclass == FP_TOP_SSE_REGS)
25962 if (regclass == FP_SECOND_SSE_REGS)
25963 return FP_SECOND_REG;
25964 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
25971 /* Generally when we see PLUS here, it's the function invariant
25972 (plus soft-fp const_int). Which can only be computed into general
25974 if (GET_CODE (x) == PLUS)
25975 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
25977 /* QImode constants are easy to load, but non-constant QImode data
25978 must go into Q_REGS. */
25979 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
25981 if (reg_class_subset_p (regclass, Q_REGS))
25983 if (reg_class_subset_p (Q_REGS, regclass))
25991 /* Discourage putting floating-point values in SSE registers unless
25992 SSE math is being used, and likewise for the 387 registers. */
25994 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
25996 enum machine_mode mode = GET_MODE (x);
25998 /* Restrict the output reload class to the register bank that we are doing
25999 math on. If we would like not to return a subset of CLASS, reject this
26000 alternative: if reload cannot do this, it will still use its choice. */
26001 mode = GET_MODE (x);
26002 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
26003 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
26005 if (X87_FLOAT_MODE_P (mode))
26007 if (regclass == FP_TOP_SSE_REGS)
26009 else if (regclass == FP_SECOND_SSE_REGS)
26010 return FP_SECOND_REG;
26012 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
26019 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
26020 enum machine_mode mode,
26021 secondary_reload_info *sri ATTRIBUTE_UNUSED)
26023 /* QImode spills from non-QI registers require
26024 intermediate register on 32bit targets. */
26025 if (!in_p && mode == QImode && !TARGET_64BIT
26026 && (rclass == GENERAL_REGS
26027 || rclass == LEGACY_REGS
26028 || rclass == INDEX_REGS))
26037 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
26038 regno = true_regnum (x);
26040 /* Return Q_REGS if the operand is in memory. */
26048 /* If we are copying between general and FP registers, we need a memory
26049 location. The same is true for SSE and MMX registers.
26051 To optimize register_move_cost performance, allow inline variant.
26053 The macro can't work reliably when one of the CLASSES is class containing
26054 registers from multiple units (SSE, MMX, integer). We avoid this by never
26055 combining those units in single alternative in the machine description.
26056 Ensure that this constraint holds to avoid unexpected surprises.
26058 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
26059 enforce these sanity checks. */
26062 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26063 enum machine_mode mode, int strict)
26065 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
26066 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
26067 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
26068 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
26069 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
26070 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
26072 gcc_assert (!strict);
26076 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
26079 /* ??? This is a lie. We do have moves between mmx/general, and for
26080 mmx/sse2. But by saying we need secondary memory we discourage the
26081 register allocator from using the mmx registers unless needed. */
26082 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
26085 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26087 /* SSE1 doesn't have any direct moves from other classes. */
26091 /* If the target says that inter-unit moves are more expensive
26092 than moving through memory, then don't generate them. */
26093 if (!TARGET_INTER_UNIT_MOVES)
26096 /* Between SSE and general, we have moves no larger than word size. */
26097 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
26105 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26106 enum machine_mode mode, int strict)
26108 return inline_secondary_memory_needed (class1, class2, mode, strict);
26111 /* Return true if the registers in CLASS cannot represent the change from
26112 modes FROM to TO. */
26115 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
26116 enum reg_class regclass)
26121 /* x87 registers can't do subreg at all, as all values are reformatted
26122 to extended precision. */
26123 if (MAYBE_FLOAT_CLASS_P (regclass))
26126 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
26128 /* Vector registers do not support QI or HImode loads. If we don't
26129 disallow a change to these modes, reload will assume it's ok to
26130 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
26131 the vec_dupv4hi pattern. */
26132 if (GET_MODE_SIZE (from) < 4)
26135 /* Vector registers do not support subreg with nonzero offsets, which
26136 are otherwise valid for integer registers. Since we can't see
26137 whether we have a nonzero offset from here, prohibit all
26138 nonparadoxical subregs changing size. */
26139 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
26146 /* Return the cost of moving data of mode M between a
26147 register and memory. A value of 2 is the default; this cost is
26148 relative to those in `REGISTER_MOVE_COST'.
26150 This function is used extensively by register_move_cost that is used to
26151 build tables at startup. Make it inline in this case.
26152 When IN is 2, return maximum of in and out move cost.
26154 If moving between registers and memory is more expensive than
26155 between two registers, you should define this macro to express the
26158 Model also increased moving costs of QImode registers in non
26162 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
26166 if (FLOAT_CLASS_P (regclass))
26184 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
26185 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
26187 if (SSE_CLASS_P (regclass))
26190 switch (GET_MODE_SIZE (mode))
26205 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
26206 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
26208 if (MMX_CLASS_P (regclass))
26211 switch (GET_MODE_SIZE (mode))
26223 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
26224 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
26226 switch (GET_MODE_SIZE (mode))
26229 if (Q_CLASS_P (regclass) || TARGET_64BIT)
26232 return ix86_cost->int_store[0];
26233 if (TARGET_PARTIAL_REG_DEPENDENCY
26234 && optimize_function_for_speed_p (cfun))
26235 cost = ix86_cost->movzbl_load;
26237 cost = ix86_cost->int_load[0];
26239 return MAX (cost, ix86_cost->int_store[0]);
26245 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
26247 return ix86_cost->movzbl_load;
26249 return ix86_cost->int_store[0] + 4;
26254 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
26255 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
26257 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
26258 if (mode == TFmode)
26261 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
26263 cost = ix86_cost->int_load[2];
26265 cost = ix86_cost->int_store[2];
26266 return (cost * (((int) GET_MODE_SIZE (mode)
26267 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
26272 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
26275 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
26279 /* Return the cost of moving data from a register in class CLASS1 to
26280 one in class CLASS2.
26282 It is not required that the cost always equal 2 when FROM is the same as TO;
26283 on some machines it is expensive to move between registers if they are not
26284 general registers. */
26287 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
26288 reg_class_t class2_i)
26290 enum reg_class class1 = (enum reg_class) class1_i;
26291 enum reg_class class2 = (enum reg_class) class2_i;
26293 /* In case we require secondary memory, compute cost of the store followed
26294 by load. In order to avoid bad register allocation choices, we need
26295 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
26297 if (inline_secondary_memory_needed (class1, class2, mode, 0))
26301 cost += inline_memory_move_cost (mode, class1, 2);
26302 cost += inline_memory_move_cost (mode, class2, 2);
26304 /* In case of copying from general_purpose_register we may emit multiple
26305 stores followed by single load causing memory size mismatch stall.
26306 Count this as arbitrarily high cost of 20. */
26307 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
26310 /* In the case of FP/MMX moves, the registers actually overlap, and we
26311 have to switch modes in order to treat them differently. */
26312 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
26313 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
26319 /* Moves between SSE/MMX and integer unit are expensive. */
26320 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
26321 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26323 /* ??? By keeping returned value relatively high, we limit the number
26324 of moves between integer and MMX/SSE registers for all targets.
26325 Additionally, high value prevents problem with x86_modes_tieable_p(),
26326 where integer modes in MMX/SSE registers are not tieable
26327 because of missing QImode and HImode moves to, from or between
26328 MMX/SSE registers. */
26329 return MAX (8, ix86_cost->mmxsse_to_integer);
26331 if (MAYBE_FLOAT_CLASS_P (class1))
26332 return ix86_cost->fp_move;
26333 if (MAYBE_SSE_CLASS_P (class1))
26334 return ix86_cost->sse_move;
26335 if (MAYBE_MMX_CLASS_P (class1))
26336 return ix86_cost->mmx_move;
26340 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
26343 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
26345 /* Flags and only flags can only hold CCmode values. */
26346 if (CC_REGNO_P (regno))
26347 return GET_MODE_CLASS (mode) == MODE_CC;
26348 if (GET_MODE_CLASS (mode) == MODE_CC
26349 || GET_MODE_CLASS (mode) == MODE_RANDOM
26350 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
26352 if (FP_REGNO_P (regno))
26353 return VALID_FP_MODE_P (mode);
26354 if (SSE_REGNO_P (regno))
26356 /* We implement the move patterns for all vector modes into and
26357 out of SSE registers, even when no operation instructions
26358 are available. OImode move is available only when AVX is
26360 return ((TARGET_AVX && mode == OImode)
26361 || VALID_AVX256_REG_MODE (mode)
26362 || VALID_SSE_REG_MODE (mode)
26363 || VALID_SSE2_REG_MODE (mode)
26364 || VALID_MMX_REG_MODE (mode)
26365 || VALID_MMX_REG_MODE_3DNOW (mode));
26367 if (MMX_REGNO_P (regno))
26369 /* We implement the move patterns for 3DNOW modes even in MMX mode,
26370 so if the register is available at all, then we can move data of
26371 the given mode into or out of it. */
26372 return (VALID_MMX_REG_MODE (mode)
26373 || VALID_MMX_REG_MODE_3DNOW (mode));
26376 if (mode == QImode)
26378 /* Take care for QImode values - they can be in non-QI regs,
26379 but then they do cause partial register stalls. */
26380 if (regno <= BX_REG || TARGET_64BIT)
26382 if (!TARGET_PARTIAL_REG_STALL)
26384 return reload_in_progress || reload_completed;
26386 /* We handle both integer and floats in the general purpose registers. */
26387 else if (VALID_INT_MODE_P (mode))
26389 else if (VALID_FP_MODE_P (mode))
26391 else if (VALID_DFP_MODE_P (mode))
26393 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
26394 on to use that value in smaller contexts, this can easily force a
26395 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
26396 supporting DImode, allow it. */
26397 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
26403 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
26404 tieable integer mode. */
26407 ix86_tieable_integer_mode_p (enum machine_mode mode)
26416 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
26419 return TARGET_64BIT;
26426 /* Return true if MODE1 is accessible in a register that can hold MODE2
26427 without copying. That is, all register classes that can hold MODE2
26428 can also hold MODE1. */
26431 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
26433 if (mode1 == mode2)
26436 if (ix86_tieable_integer_mode_p (mode1)
26437 && ix86_tieable_integer_mode_p (mode2))
26440 /* MODE2 being XFmode implies fp stack or general regs, which means we
26441 can tie any smaller floating point modes to it. Note that we do not
26442 tie this with TFmode. */
26443 if (mode2 == XFmode)
26444 return mode1 == SFmode || mode1 == DFmode;
26446 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
26447 that we can tie it with SFmode. */
26448 if (mode2 == DFmode)
26449 return mode1 == SFmode;
26451 /* If MODE2 is only appropriate for an SSE register, then tie with
26452 any other mode acceptable to SSE registers. */
26453 if (GET_MODE_SIZE (mode2) == 16
26454 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
26455 return (GET_MODE_SIZE (mode1) == 16
26456 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
26458 /* If MODE2 is appropriate for an MMX register, then tie
26459 with any other mode acceptable to MMX registers. */
26460 if (GET_MODE_SIZE (mode2) == 8
26461 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
26462 return (GET_MODE_SIZE (mode1) == 8
26463 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
26468 /* Compute a (partial) cost for rtx X. Return true if the complete
26469 cost has been computed, and false if subexpressions should be
26470 scanned. In either case, *TOTAL contains the cost result. */
26473 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
26475 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
26476 enum machine_mode mode = GET_MODE (x);
26477 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
26485 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
26487 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
26489 else if (flag_pic && SYMBOLIC_CONST (x)
26491 || (!GET_CODE (x) != LABEL_REF
26492 && (GET_CODE (x) != SYMBOL_REF
26493 || !SYMBOL_REF_LOCAL_P (x)))))
26500 if (mode == VOIDmode)
26503 switch (standard_80387_constant_p (x))
26508 default: /* Other constants */
26513 /* Start with (MEM (SYMBOL_REF)), since that's where
26514 it'll probably end up. Add a penalty for size. */
26515 *total = (COSTS_N_INSNS (1)
26516 + (flag_pic != 0 && !TARGET_64BIT)
26517 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
26523 /* The zero extensions is often completely free on x86_64, so make
26524 it as cheap as possible. */
26525 if (TARGET_64BIT && mode == DImode
26526 && GET_MODE (XEXP (x, 0)) == SImode)
26528 else if (TARGET_ZERO_EXTEND_WITH_AND)
26529 *total = cost->add;
26531 *total = cost->movzx;
26535 *total = cost->movsx;
26539 if (CONST_INT_P (XEXP (x, 1))
26540 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
26542 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26545 *total = cost->add;
26548 if ((value == 2 || value == 3)
26549 && cost->lea <= cost->shift_const)
26551 *total = cost->lea;
26561 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
26563 if (CONST_INT_P (XEXP (x, 1)))
26565 if (INTVAL (XEXP (x, 1)) > 32)
26566 *total = cost->shift_const + COSTS_N_INSNS (2);
26568 *total = cost->shift_const * 2;
26572 if (GET_CODE (XEXP (x, 1)) == AND)
26573 *total = cost->shift_var * 2;
26575 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
26580 if (CONST_INT_P (XEXP (x, 1)))
26581 *total = cost->shift_const;
26583 *total = cost->shift_var;
26588 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26590 /* ??? SSE scalar cost should be used here. */
26591 *total = cost->fmul;
26594 else if (X87_FLOAT_MODE_P (mode))
26596 *total = cost->fmul;
26599 else if (FLOAT_MODE_P (mode))
26601 /* ??? SSE vector cost should be used here. */
26602 *total = cost->fmul;
26607 rtx op0 = XEXP (x, 0);
26608 rtx op1 = XEXP (x, 1);
26610 if (CONST_INT_P (XEXP (x, 1)))
26612 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26613 for (nbits = 0; value != 0; value &= value - 1)
26617 /* This is arbitrary. */
26620 /* Compute costs correctly for widening multiplication. */
26621 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
26622 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
26623 == GET_MODE_SIZE (mode))
26625 int is_mulwiden = 0;
26626 enum machine_mode inner_mode = GET_MODE (op0);
26628 if (GET_CODE (op0) == GET_CODE (op1))
26629 is_mulwiden = 1, op1 = XEXP (op1, 0);
26630 else if (CONST_INT_P (op1))
26632 if (GET_CODE (op0) == SIGN_EXTEND)
26633 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
26636 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
26640 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
26643 *total = (cost->mult_init[MODE_INDEX (mode)]
26644 + nbits * cost->mult_bit
26645 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
26654 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26655 /* ??? SSE cost should be used here. */
26656 *total = cost->fdiv;
26657 else if (X87_FLOAT_MODE_P (mode))
26658 *total = cost->fdiv;
26659 else if (FLOAT_MODE_P (mode))
26660 /* ??? SSE vector cost should be used here. */
26661 *total = cost->fdiv;
26663 *total = cost->divide[MODE_INDEX (mode)];
26667 if (GET_MODE_CLASS (mode) == MODE_INT
26668 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
26670 if (GET_CODE (XEXP (x, 0)) == PLUS
26671 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
26672 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
26673 && CONSTANT_P (XEXP (x, 1)))
26675 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
26676 if (val == 2 || val == 4 || val == 8)
26678 *total = cost->lea;
26679 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26680 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
26681 outer_code, speed);
26682 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26686 else if (GET_CODE (XEXP (x, 0)) == MULT
26687 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
26689 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
26690 if (val == 2 || val == 4 || val == 8)
26692 *total = cost->lea;
26693 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26694 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26698 else if (GET_CODE (XEXP (x, 0)) == PLUS)
26700 *total = cost->lea;
26701 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26702 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26703 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26710 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26712 /* ??? SSE cost should be used here. */
26713 *total = cost->fadd;
26716 else if (X87_FLOAT_MODE_P (mode))
26718 *total = cost->fadd;
26721 else if (FLOAT_MODE_P (mode))
26723 /* ??? SSE vector cost should be used here. */
26724 *total = cost->fadd;
26732 if (!TARGET_64BIT && mode == DImode)
26734 *total = (cost->add * 2
26735 + (rtx_cost (XEXP (x, 0), outer_code, speed)
26736 << (GET_MODE (XEXP (x, 0)) != DImode))
26737 + (rtx_cost (XEXP (x, 1), outer_code, speed)
26738 << (GET_MODE (XEXP (x, 1)) != DImode)));
26744 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26746 /* ??? SSE cost should be used here. */
26747 *total = cost->fchs;
26750 else if (X87_FLOAT_MODE_P (mode))
26752 *total = cost->fchs;
26755 else if (FLOAT_MODE_P (mode))
26757 /* ??? SSE vector cost should be used here. */
26758 *total = cost->fchs;
26764 if (!TARGET_64BIT && mode == DImode)
26765 *total = cost->add * 2;
26767 *total = cost->add;
26771 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
26772 && XEXP (XEXP (x, 0), 1) == const1_rtx
26773 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
26774 && XEXP (x, 1) == const0_rtx)
26776 /* This kind of construct is implemented using test[bwl].
26777 Treat it as if we had an AND. */
26778 *total = (cost->add
26779 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
26780 + rtx_cost (const1_rtx, outer_code, speed));
26786 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
26791 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26792 /* ??? SSE cost should be used here. */
26793 *total = cost->fabs;
26794 else if (X87_FLOAT_MODE_P (mode))
26795 *total = cost->fabs;
26796 else if (FLOAT_MODE_P (mode))
26797 /* ??? SSE vector cost should be used here. */
26798 *total = cost->fabs;
26802 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26803 /* ??? SSE cost should be used here. */
26804 *total = cost->fsqrt;
26805 else if (X87_FLOAT_MODE_P (mode))
26806 *total = cost->fsqrt;
26807 else if (FLOAT_MODE_P (mode))
26808 /* ??? SSE vector cost should be used here. */
26809 *total = cost->fsqrt;
26813 if (XINT (x, 1) == UNSPEC_TP)
26820 case VEC_DUPLICATE:
26821 /* ??? Assume all of these vector manipulation patterns are
26822 recognizable. In which case they all pretty much have the
26824 *total = COSTS_N_INSNS (1);
26834 static int current_machopic_label_num;
26836 /* Given a symbol name and its associated stub, write out the
26837 definition of the stub. */
26840 machopic_output_stub (FILE *file, const char *symb, const char *stub)
26842 unsigned int length;
26843 char *binder_name, *symbol_name, lazy_ptr_name[32];
26844 int label = ++current_machopic_label_num;
26846 /* For 64-bit we shouldn't get here. */
26847 gcc_assert (!TARGET_64BIT);
26849 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
26850 symb = targetm.strip_name_encoding (symb);
26852 length = strlen (stub);
26853 binder_name = XALLOCAVEC (char, length + 32);
26854 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
26856 length = strlen (symb);
26857 symbol_name = XALLOCAVEC (char, length + 32);
26858 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
26860 sprintf (lazy_ptr_name, "L%d$lz", label);
26863 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
26865 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
26867 fprintf (file, "%s:\n", stub);
26868 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26872 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
26873 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
26874 fprintf (file, "\tjmp\t*%%edx\n");
26877 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
26879 fprintf (file, "%s:\n", binder_name);
26883 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
26884 fputs ("\tpushl\t%eax\n", file);
26887 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
26889 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
26891 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
26892 fprintf (file, "%s:\n", lazy_ptr_name);
26893 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26894 fprintf (file, ASM_LONG "%s\n", binder_name);
26896 #endif /* TARGET_MACHO */
26898 /* Order the registers for register allocator. */
26901 x86_order_regs_for_local_alloc (void)
26906 /* First allocate the local general purpose registers. */
26907 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26908 if (GENERAL_REGNO_P (i) && call_used_regs[i])
26909 reg_alloc_order [pos++] = i;
26911 /* Global general purpose registers. */
26912 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26913 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
26914 reg_alloc_order [pos++] = i;
26916 /* x87 registers come first in case we are doing FP math
26918 if (!TARGET_SSE_MATH)
26919 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
26920 reg_alloc_order [pos++] = i;
26922 /* SSE registers. */
26923 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
26924 reg_alloc_order [pos++] = i;
26925 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
26926 reg_alloc_order [pos++] = i;
26928 /* x87 registers. */
26929 if (TARGET_SSE_MATH)
26930 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
26931 reg_alloc_order [pos++] = i;
26933 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
26934 reg_alloc_order [pos++] = i;
26936 /* Initialize the rest of array as we do not allocate some registers
26938 while (pos < FIRST_PSEUDO_REGISTER)
26939 reg_alloc_order [pos++] = 0;
26942 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
26943 struct attribute_spec.handler. */
26945 ix86_handle_abi_attribute (tree *node, tree name,
26946 tree args ATTRIBUTE_UNUSED,
26947 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26949 if (TREE_CODE (*node) != FUNCTION_TYPE
26950 && TREE_CODE (*node) != METHOD_TYPE
26951 && TREE_CODE (*node) != FIELD_DECL
26952 && TREE_CODE (*node) != TYPE_DECL)
26954 warning (OPT_Wattributes, "%qE attribute only applies to functions",
26956 *no_add_attrs = true;
26961 warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
26963 *no_add_attrs = true;
26967 /* Can combine regparm with all attributes but fastcall. */
26968 if (is_attribute_p ("ms_abi", name))
26970 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
26972 error ("ms_abi and sysv_abi attributes are not compatible");
26977 else if (is_attribute_p ("sysv_abi", name))
26979 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
26981 error ("ms_abi and sysv_abi attributes are not compatible");
26990 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
26991 struct attribute_spec.handler. */
26993 ix86_handle_struct_attribute (tree *node, tree name,
26994 tree args ATTRIBUTE_UNUSED,
26995 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26998 if (DECL_P (*node))
27000 if (TREE_CODE (*node) == TYPE_DECL)
27001 type = &TREE_TYPE (*node);
27006 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
27007 || TREE_CODE (*type) == UNION_TYPE)))
27009 warning (OPT_Wattributes, "%qE attribute ignored",
27011 *no_add_attrs = true;
27014 else if ((is_attribute_p ("ms_struct", name)
27015 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
27016 || ((is_attribute_p ("gcc_struct", name)
27017 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
27019 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
27021 *no_add_attrs = true;
27028 ix86_handle_fndecl_attribute (tree *node, tree name,
27029 tree args ATTRIBUTE_UNUSED,
27030 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27032 if (TREE_CODE (*node) != FUNCTION_DECL)
27034 warning (OPT_Wattributes, "%qE attribute only applies to functions",
27036 *no_add_attrs = true;
27040 #ifndef HAVE_AS_IX86_SWAP
27042 sorry ("ms_hook_prologue attribute needs assembler swap suffix support");
27049 ix86_ms_bitfield_layout_p (const_tree record_type)
27051 return (TARGET_MS_BITFIELD_LAYOUT &&
27052 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
27053 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
27056 /* Returns an expression indicating where the this parameter is
27057 located on entry to the FUNCTION. */
27060 x86_this_parameter (tree function)
27062 tree type = TREE_TYPE (function);
27063 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
27068 const int *parm_regs;
27070 if (ix86_function_type_abi (type) == MS_ABI)
27071 parm_regs = x86_64_ms_abi_int_parameter_registers;
27073 parm_regs = x86_64_int_parameter_registers;
27074 return gen_rtx_REG (DImode, parm_regs[aggr]);
27077 nregs = ix86_function_regparm (type, function);
27079 if (nregs > 0 && !stdarg_p (type))
27083 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
27084 regno = aggr ? DX_REG : CX_REG;
27085 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
27089 return gen_rtx_MEM (SImode,
27090 plus_constant (stack_pointer_rtx, 4));
27099 return gen_rtx_MEM (SImode,
27100 plus_constant (stack_pointer_rtx, 4));
27103 return gen_rtx_REG (SImode, regno);
27106 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
27109 /* Determine whether x86_output_mi_thunk can succeed. */
27112 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
27113 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
27114 HOST_WIDE_INT vcall_offset, const_tree function)
27116 /* 64-bit can handle anything. */
27120 /* For 32-bit, everything's fine if we have one free register. */
27121 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
27124 /* Need a free register for vcall_offset. */
27128 /* Need a free register for GOT references. */
27129 if (flag_pic && !targetm.binds_local_p (function))
27132 /* Otherwise ok. */
27136 /* Output the assembler code for a thunk function. THUNK_DECL is the
27137 declaration for the thunk function itself, FUNCTION is the decl for
27138 the target function. DELTA is an immediate constant offset to be
27139 added to THIS. If VCALL_OFFSET is nonzero, the word at
27140 *(*this + vcall_offset) should be added to THIS. */
27143 x86_output_mi_thunk (FILE *file,
27144 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
27145 HOST_WIDE_INT vcall_offset, tree function)
27148 rtx this_param = x86_this_parameter (function);
27151 /* Make sure unwind info is emitted for the thunk if needed. */
27152 final_start_function (emit_barrier (), file, 1);
27154 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
27155 pull it in now and let DELTA benefit. */
27156 if (REG_P (this_param))
27157 this_reg = this_param;
27158 else if (vcall_offset)
27160 /* Put the this parameter into %eax. */
27161 xops[0] = this_param;
27162 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
27163 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27166 this_reg = NULL_RTX;
27168 /* Adjust the this parameter by a fixed constant. */
27171 xops[0] = GEN_INT (delta);
27172 xops[1] = this_reg ? this_reg : this_param;
27175 if (!x86_64_general_operand (xops[0], DImode))
27177 tmp = gen_rtx_REG (DImode, R10_REG);
27179 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
27181 xops[1] = this_param;
27183 if (x86_maybe_negate_const_int (&xops[0], DImode))
27184 output_asm_insn ("sub{q}\t{%0, %1|%1, %0}", xops);
27186 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
27188 else if (x86_maybe_negate_const_int (&xops[0], SImode))
27189 output_asm_insn ("sub{l}\t{%0, %1|%1, %0}", xops);
27191 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
27194 /* Adjust the this parameter by a value stored in the vtable. */
27198 tmp = gen_rtx_REG (DImode, R10_REG);
27201 int tmp_regno = CX_REG;
27202 if (lookup_attribute ("fastcall",
27203 TYPE_ATTRIBUTES (TREE_TYPE (function)))
27204 || lookup_attribute ("thiscall",
27205 TYPE_ATTRIBUTES (TREE_TYPE (function))))
27206 tmp_regno = AX_REG;
27207 tmp = gen_rtx_REG (SImode, tmp_regno);
27210 xops[0] = gen_rtx_MEM (Pmode, this_reg);
27212 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27214 /* Adjust the this parameter. */
27215 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
27216 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
27218 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
27219 xops[0] = GEN_INT (vcall_offset);
27221 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
27222 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
27224 xops[1] = this_reg;
27225 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
27228 /* If necessary, drop THIS back to its stack slot. */
27229 if (this_reg && this_reg != this_param)
27231 xops[0] = this_reg;
27232 xops[1] = this_param;
27233 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27236 xops[0] = XEXP (DECL_RTL (function), 0);
27239 if (!flag_pic || targetm.binds_local_p (function))
27240 output_asm_insn ("jmp\t%P0", xops);
27241 /* All thunks should be in the same object as their target,
27242 and thus binds_local_p should be true. */
27243 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
27244 gcc_unreachable ();
27247 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
27248 tmp = gen_rtx_CONST (Pmode, tmp);
27249 tmp = gen_rtx_MEM (QImode, tmp);
27251 output_asm_insn ("jmp\t%A0", xops);
27256 if (!flag_pic || targetm.binds_local_p (function))
27257 output_asm_insn ("jmp\t%P0", xops);
27262 rtx sym_ref = XEXP (DECL_RTL (function), 0);
27263 if (TARGET_MACHO_BRANCH_ISLANDS)
27264 sym_ref = (gen_rtx_SYMBOL_REF
27266 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
27267 tmp = gen_rtx_MEM (QImode, sym_ref);
27269 output_asm_insn ("jmp\t%0", xops);
27272 #endif /* TARGET_MACHO */
27274 tmp = gen_rtx_REG (SImode, CX_REG);
27275 output_set_got (tmp, NULL_RTX);
27278 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
27279 output_asm_insn ("jmp\t{*}%1", xops);
27282 final_end_function ();
27286 x86_file_start (void)
27288 default_file_start ();
27290 darwin_file_start ();
27292 if (X86_FILE_START_VERSION_DIRECTIVE)
27293 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
27294 if (X86_FILE_START_FLTUSED)
27295 fputs ("\t.global\t__fltused\n", asm_out_file);
27296 if (ix86_asm_dialect == ASM_INTEL)
27297 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
27301 x86_field_alignment (tree field, int computed)
27303 enum machine_mode mode;
27304 tree type = TREE_TYPE (field);
27306 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
27308 mode = TYPE_MODE (strip_array_types (type));
27309 if (mode == DFmode || mode == DCmode
27310 || GET_MODE_CLASS (mode) == MODE_INT
27311 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
27312 return MIN (32, computed);
27316 /* Output assembler code to FILE to increment profiler label # LABELNO
27317 for profiling a function entry. */
27319 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
27321 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
27326 #ifndef NO_PROFILE_COUNTERS
27327 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
27330 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
27331 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
27333 fprintf (file, "\tcall\t%s\n", mcount_name);
27337 #ifndef NO_PROFILE_COUNTERS
27338 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
27341 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
27345 #ifndef NO_PROFILE_COUNTERS
27346 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
27349 fprintf (file, "\tcall\t%s\n", mcount_name);
27353 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
27354 /* We don't have exact information about the insn sizes, but we may assume
27355 quite safely that we are informed about all 1 byte insns and memory
27356 address sizes. This is enough to eliminate unnecessary padding in
27360 min_insn_size (rtx insn)
27364 if (!INSN_P (insn) || !active_insn_p (insn))
27367 /* Discard alignments we've emit and jump instructions. */
27368 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
27369 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
27371 if (JUMP_TABLE_DATA_P (insn))
27374 /* Important case - calls are always 5 bytes.
27375 It is common to have many calls in the row. */
27377 && symbolic_reference_mentioned_p (PATTERN (insn))
27378 && !SIBLING_CALL_P (insn))
27380 len = get_attr_length (insn);
27384 /* For normal instructions we rely on get_attr_length being exact,
27385 with a few exceptions. */
27386 if (!JUMP_P (insn))
27388 enum attr_type type = get_attr_type (insn);
27393 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
27394 || asm_noperands (PATTERN (insn)) >= 0)
27401 /* Otherwise trust get_attr_length. */
27405 l = get_attr_length_address (insn);
27406 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
27415 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
27419 ix86_avoid_jump_mispredicts (void)
27421 rtx insn, start = get_insns ();
27422 int nbytes = 0, njumps = 0;
27425 /* Look for all minimal intervals of instructions containing 4 jumps.
27426 The intervals are bounded by START and INSN. NBYTES is the total
27427 size of instructions in the interval including INSN and not including
27428 START. When the NBYTES is smaller than 16 bytes, it is possible
27429 that the end of START and INSN ends up in the same 16byte page.
27431 The smallest offset in the page INSN can start is the case where START
27432 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
27433 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
27435 for (insn = start; insn; insn = NEXT_INSN (insn))
27439 if (LABEL_P (insn))
27441 int align = label_to_alignment (insn);
27442 int max_skip = label_to_max_skip (insn);
27446 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
27447 already in the current 16 byte page, because otherwise
27448 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
27449 bytes to reach 16 byte boundary. */
27451 || (align <= 3 && max_skip != (1 << align) - 1))
27454 fprintf (dump_file, "Label %i with max_skip %i\n",
27455 INSN_UID (insn), max_skip);
27458 while (nbytes + max_skip >= 16)
27460 start = NEXT_INSN (start);
27461 if ((JUMP_P (start)
27462 && GET_CODE (PATTERN (start)) != ADDR_VEC
27463 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27465 njumps--, isjump = 1;
27468 nbytes -= min_insn_size (start);
27474 min_size = min_insn_size (insn);
27475 nbytes += min_size;
27477 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
27478 INSN_UID (insn), min_size);
27480 && GET_CODE (PATTERN (insn)) != ADDR_VEC
27481 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
27489 start = NEXT_INSN (start);
27490 if ((JUMP_P (start)
27491 && GET_CODE (PATTERN (start)) != ADDR_VEC
27492 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27494 njumps--, isjump = 1;
27497 nbytes -= min_insn_size (start);
27499 gcc_assert (njumps >= 0);
27501 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
27502 INSN_UID (start), INSN_UID (insn), nbytes);
27504 if (njumps == 3 && isjump && nbytes < 16)
27506 int padsize = 15 - nbytes + min_insn_size (insn);
27509 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
27510 INSN_UID (insn), padsize);
27511 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
27517 /* AMD Athlon works faster
27518 when RET is not destination of conditional jump or directly preceded
27519 by other jump instruction. We avoid the penalty by inserting NOP just
27520 before the RET instructions in such cases. */
27522 ix86_pad_returns (void)
27527 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
27529 basic_block bb = e->src;
27530 rtx ret = BB_END (bb);
27532 bool replace = false;
27534 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
27535 || optimize_bb_for_size_p (bb))
27537 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
27538 if (active_insn_p (prev) || LABEL_P (prev))
27540 if (prev && LABEL_P (prev))
27545 FOR_EACH_EDGE (e, ei, bb->preds)
27546 if (EDGE_FREQUENCY (e) && e->src->index >= 0
27547 && !(e->flags & EDGE_FALLTHRU))
27552 prev = prev_active_insn (ret);
27554 && ((JUMP_P (prev) && any_condjump_p (prev))
27557 /* Empty functions get branch mispredict even when the jump destination
27558 is not visible to us. */
27559 if (!prev && !optimize_function_for_size_p (cfun))
27564 emit_jump_insn_before (gen_return_internal_long (), ret);
27570 /* Implement machine specific optimizations. We implement padding of returns
27571 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
27575 if (optimize && optimize_function_for_speed_p (cfun))
27577 if (TARGET_PAD_RETURNS)
27578 ix86_pad_returns ();
27579 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
27580 if (TARGET_FOUR_JUMP_LIMIT)
27581 ix86_avoid_jump_mispredicts ();
27586 /* Return nonzero when QImode register that must be represented via REX prefix
27589 x86_extended_QIreg_mentioned_p (rtx insn)
27592 extract_insn_cached (insn);
27593 for (i = 0; i < recog_data.n_operands; i++)
27594 if (REG_P (recog_data.operand[i])
27595 && REGNO (recog_data.operand[i]) > BX_REG)
27600 /* Return nonzero when P points to register encoded via REX prefix.
27601 Called via for_each_rtx. */
27603 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
27605 unsigned int regno;
27608 regno = REGNO (*p);
27609 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
27612 /* Return true when INSN mentions register that must be encoded using REX
27615 x86_extended_reg_mentioned_p (rtx insn)
27617 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
27618 extended_reg_mentioned_1, NULL);
27621 /* If profitable, negate (without causing overflow) integer constant
27622 of mode MODE at location LOC. Return true in this case. */
27624 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
27628 if (!CONST_INT_P (*loc))
27634 /* DImode x86_64 constants must fit in 32 bits. */
27635 gcc_assert (x86_64_immediate_operand (*loc, mode));
27646 gcc_unreachable ();
27649 /* Avoid overflows. */
27650 if (mode_signbit_p (mode, *loc))
27653 val = INTVAL (*loc);
27655 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
27656 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
27657 if ((val < 0 && val != -128)
27660 *loc = GEN_INT (-val);
27667 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
27668 optabs would emit if we didn't have TFmode patterns. */
27671 x86_emit_floatuns (rtx operands[2])
27673 rtx neglab, donelab, i0, i1, f0, in, out;
27674 enum machine_mode mode, inmode;
27676 inmode = GET_MODE (operands[1]);
27677 gcc_assert (inmode == SImode || inmode == DImode);
27680 in = force_reg (inmode, operands[1]);
27681 mode = GET_MODE (out);
27682 neglab = gen_label_rtx ();
27683 donelab = gen_label_rtx ();
27684 f0 = gen_reg_rtx (mode);
27686 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
27688 expand_float (out, in, 0);
27690 emit_jump_insn (gen_jump (donelab));
27693 emit_label (neglab);
27695 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
27697 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
27699 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
27701 expand_float (f0, i0, 0);
27703 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
27705 emit_label (donelab);
27708 /* AVX does not support 32-byte integer vector operations,
27709 thus the longest vector we are faced with is V16QImode. */
27710 #define MAX_VECT_LEN 16
27712 struct expand_vec_perm_d
27714 rtx target, op0, op1;
27715 unsigned char perm[MAX_VECT_LEN];
27716 enum machine_mode vmode;
27717 unsigned char nelt;
27721 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
27722 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
27724 /* Get a vector mode of the same size as the original but with elements
27725 twice as wide. This is only guaranteed to apply to integral vectors. */
27727 static inline enum machine_mode
27728 get_mode_wider_vector (enum machine_mode o)
27730 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
27731 enum machine_mode n = GET_MODE_WIDER_MODE (o);
27732 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
27733 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
27737 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27738 with all elements equal to VAR. Return true if successful. */
27741 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
27742 rtx target, rtx val)
27765 /* First attempt to recognize VAL as-is. */
27766 dup = gen_rtx_VEC_DUPLICATE (mode, val);
27767 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
27768 if (recog_memoized (insn) < 0)
27771 /* If that fails, force VAL into a register. */
27774 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
27775 seq = get_insns ();
27778 emit_insn_before (seq, insn);
27780 ok = recog_memoized (insn) >= 0;
27789 if (TARGET_SSE || TARGET_3DNOW_A)
27793 val = gen_lowpart (SImode, val);
27794 x = gen_rtx_TRUNCATE (HImode, val);
27795 x = gen_rtx_VEC_DUPLICATE (mode, x);
27796 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27809 struct expand_vec_perm_d dperm;
27813 memset (&dperm, 0, sizeof (dperm));
27814 dperm.target = target;
27815 dperm.vmode = mode;
27816 dperm.nelt = GET_MODE_NUNITS (mode);
27817 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
27819 /* Extend to SImode using a paradoxical SUBREG. */
27820 tmp1 = gen_reg_rtx (SImode);
27821 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27823 /* Insert the SImode value as low element of a V4SImode vector. */
27824 tmp2 = gen_lowpart (V4SImode, dperm.op0);
27825 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
27827 ok = (expand_vec_perm_1 (&dperm)
27828 || expand_vec_perm_broadcast_1 (&dperm));
27840 /* Replicate the value once into the next wider mode and recurse. */
27842 enum machine_mode smode, wsmode, wvmode;
27845 smode = GET_MODE_INNER (mode);
27846 wvmode = get_mode_wider_vector (mode);
27847 wsmode = GET_MODE_INNER (wvmode);
27849 val = convert_modes (wsmode, smode, val, true);
27850 x = expand_simple_binop (wsmode, ASHIFT, val,
27851 GEN_INT (GET_MODE_BITSIZE (smode)),
27852 NULL_RTX, 1, OPTAB_LIB_WIDEN);
27853 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
27855 x = gen_lowpart (wvmode, target);
27856 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
27864 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
27865 rtx x = gen_reg_rtx (hvmode);
27867 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
27870 x = gen_rtx_VEC_CONCAT (mode, x, x);
27871 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27880 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27881 whose ONE_VAR element is VAR, and other elements are zero. Return true
27885 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
27886 rtx target, rtx var, int one_var)
27888 enum machine_mode vsimode;
27891 bool use_vector_set = false;
27896 /* For SSE4.1, we normally use vector set. But if the second
27897 element is zero and inter-unit moves are OK, we use movq
27899 use_vector_set = (TARGET_64BIT
27901 && !(TARGET_INTER_UNIT_MOVES
27907 use_vector_set = TARGET_SSE4_1;
27910 use_vector_set = TARGET_SSE2;
27913 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
27920 use_vector_set = TARGET_AVX;
27923 /* Use ix86_expand_vector_set in 64bit mode only. */
27924 use_vector_set = TARGET_AVX && TARGET_64BIT;
27930 if (use_vector_set)
27932 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
27933 var = force_reg (GET_MODE_INNER (mode), var);
27934 ix86_expand_vector_set (mmx_ok, target, var, one_var);
27950 var = force_reg (GET_MODE_INNER (mode), var);
27951 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
27952 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27957 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
27958 new_target = gen_reg_rtx (mode);
27960 new_target = target;
27961 var = force_reg (GET_MODE_INNER (mode), var);
27962 x = gen_rtx_VEC_DUPLICATE (mode, var);
27963 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
27964 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
27967 /* We need to shuffle the value to the correct position, so
27968 create a new pseudo to store the intermediate result. */
27970 /* With SSE2, we can use the integer shuffle insns. */
27971 if (mode != V4SFmode && TARGET_SSE2)
27973 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
27975 GEN_INT (one_var == 1 ? 0 : 1),
27976 GEN_INT (one_var == 2 ? 0 : 1),
27977 GEN_INT (one_var == 3 ? 0 : 1)));
27978 if (target != new_target)
27979 emit_move_insn (target, new_target);
27983 /* Otherwise convert the intermediate result to V4SFmode and
27984 use the SSE1 shuffle instructions. */
27985 if (mode != V4SFmode)
27987 tmp = gen_reg_rtx (V4SFmode);
27988 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
27993 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
27995 GEN_INT (one_var == 1 ? 0 : 1),
27996 GEN_INT (one_var == 2 ? 0+4 : 1+4),
27997 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
27999 if (mode != V4SFmode)
28000 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
28001 else if (tmp != target)
28002 emit_move_insn (target, tmp);
28004 else if (target != new_target)
28005 emit_move_insn (target, new_target);
28010 vsimode = V4SImode;
28016 vsimode = V2SImode;
28022 /* Zero extend the variable element to SImode and recurse. */
28023 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
28025 x = gen_reg_rtx (vsimode);
28026 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
28028 gcc_unreachable ();
28030 emit_move_insn (target, gen_lowpart (mode, x));
28038 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28039 consisting of the values in VALS. It is known that all elements
28040 except ONE_VAR are constants. Return true if successful. */
28043 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
28044 rtx target, rtx vals, int one_var)
28046 rtx var = XVECEXP (vals, 0, one_var);
28047 enum machine_mode wmode;
28050 const_vec = copy_rtx (vals);
28051 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
28052 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
28060 /* For the two element vectors, it's just as easy to use
28061 the general case. */
28065 /* Use ix86_expand_vector_set in 64bit mode only. */
28088 /* There's no way to set one QImode entry easily. Combine
28089 the variable value with its adjacent constant value, and
28090 promote to an HImode set. */
28091 x = XVECEXP (vals, 0, one_var ^ 1);
28094 var = convert_modes (HImode, QImode, var, true);
28095 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
28096 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28097 x = GEN_INT (INTVAL (x) & 0xff);
28101 var = convert_modes (HImode, QImode, var, true);
28102 x = gen_int_mode (INTVAL (x) << 8, HImode);
28104 if (x != const0_rtx)
28105 var = expand_simple_binop (HImode, IOR, var, x, var,
28106 1, OPTAB_LIB_WIDEN);
28108 x = gen_reg_rtx (wmode);
28109 emit_move_insn (x, gen_lowpart (wmode, const_vec));
28110 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
28112 emit_move_insn (target, gen_lowpart (mode, x));
28119 emit_move_insn (target, const_vec);
28120 ix86_expand_vector_set (mmx_ok, target, var, one_var);
28124 /* A subroutine of ix86_expand_vector_init_general. Use vector
28125 concatenate to handle the most general case: all values variable,
28126 and none identical. */
28129 ix86_expand_vector_init_concat (enum machine_mode mode,
28130 rtx target, rtx *ops, int n)
28132 enum machine_mode cmode, hmode = VOIDmode;
28133 rtx first[8], second[4];
28173 gcc_unreachable ();
28176 if (!register_operand (ops[1], cmode))
28177 ops[1] = force_reg (cmode, ops[1]);
28178 if (!register_operand (ops[0], cmode))
28179 ops[0] = force_reg (cmode, ops[0]);
28180 emit_insn (gen_rtx_SET (VOIDmode, target,
28181 gen_rtx_VEC_CONCAT (mode, ops[0],
28201 gcc_unreachable ();
28217 gcc_unreachable ();
28222 /* FIXME: We process inputs backward to help RA. PR 36222. */
28225 for (; i > 0; i -= 2, j--)
28227 first[j] = gen_reg_rtx (cmode);
28228 v = gen_rtvec (2, ops[i - 1], ops[i]);
28229 ix86_expand_vector_init (false, first[j],
28230 gen_rtx_PARALLEL (cmode, v));
28236 gcc_assert (hmode != VOIDmode);
28237 for (i = j = 0; i < n; i += 2, j++)
28239 second[j] = gen_reg_rtx (hmode);
28240 ix86_expand_vector_init_concat (hmode, second [j],
28244 ix86_expand_vector_init_concat (mode, target, second, n);
28247 ix86_expand_vector_init_concat (mode, target, first, n);
28251 gcc_unreachable ();
28255 /* A subroutine of ix86_expand_vector_init_general. Use vector
28256 interleave to handle the most general case: all values variable,
28257 and none identical. */
28260 ix86_expand_vector_init_interleave (enum machine_mode mode,
28261 rtx target, rtx *ops, int n)
28263 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
28266 rtx (*gen_load_even) (rtx, rtx, rtx);
28267 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
28268 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
28273 gen_load_even = gen_vec_setv8hi;
28274 gen_interleave_first_low = gen_vec_interleave_lowv4si;
28275 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28276 inner_mode = HImode;
28277 first_imode = V4SImode;
28278 second_imode = V2DImode;
28279 third_imode = VOIDmode;
28282 gen_load_even = gen_vec_setv16qi;
28283 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
28284 gen_interleave_second_low = gen_vec_interleave_lowv4si;
28285 inner_mode = QImode;
28286 first_imode = V8HImode;
28287 second_imode = V4SImode;
28288 third_imode = V2DImode;
28291 gcc_unreachable ();
28294 for (i = 0; i < n; i++)
28296 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
28297 op0 = gen_reg_rtx (SImode);
28298 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
28300 /* Insert the SImode value as low element of V4SImode vector. */
28301 op1 = gen_reg_rtx (V4SImode);
28302 op0 = gen_rtx_VEC_MERGE (V4SImode,
28303 gen_rtx_VEC_DUPLICATE (V4SImode,
28305 CONST0_RTX (V4SImode),
28307 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
28309 /* Cast the V4SImode vector back to a vector in orignal mode. */
28310 op0 = gen_reg_rtx (mode);
28311 emit_move_insn (op0, gen_lowpart (mode, op1));
28313 /* Load even elements into the second positon. */
28314 emit_insn (gen_load_even (op0,
28315 force_reg (inner_mode,
28319 /* Cast vector to FIRST_IMODE vector. */
28320 ops[i] = gen_reg_rtx (first_imode);
28321 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
28324 /* Interleave low FIRST_IMODE vectors. */
28325 for (i = j = 0; i < n; i += 2, j++)
28327 op0 = gen_reg_rtx (first_imode);
28328 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
28330 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
28331 ops[j] = gen_reg_rtx (second_imode);
28332 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
28335 /* Interleave low SECOND_IMODE vectors. */
28336 switch (second_imode)
28339 for (i = j = 0; i < n / 2; i += 2, j++)
28341 op0 = gen_reg_rtx (second_imode);
28342 emit_insn (gen_interleave_second_low (op0, ops[i],
28345 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
28347 ops[j] = gen_reg_rtx (third_imode);
28348 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
28350 second_imode = V2DImode;
28351 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28355 op0 = gen_reg_rtx (second_imode);
28356 emit_insn (gen_interleave_second_low (op0, ops[0],
28359 /* Cast the SECOND_IMODE vector back to a vector on original
28361 emit_insn (gen_rtx_SET (VOIDmode, target,
28362 gen_lowpart (mode, op0)));
28366 gcc_unreachable ();
28370 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
28371 all values variable, and none identical. */
28374 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
28375 rtx target, rtx vals)
28377 rtx ops[32], op0, op1;
28378 enum machine_mode half_mode = VOIDmode;
28385 if (!mmx_ok && !TARGET_SSE)
28397 n = GET_MODE_NUNITS (mode);
28398 for (i = 0; i < n; i++)
28399 ops[i] = XVECEXP (vals, 0, i);
28400 ix86_expand_vector_init_concat (mode, target, ops, n);
28404 half_mode = V16QImode;
28408 half_mode = V8HImode;
28412 n = GET_MODE_NUNITS (mode);
28413 for (i = 0; i < n; i++)
28414 ops[i] = XVECEXP (vals, 0, i);
28415 op0 = gen_reg_rtx (half_mode);
28416 op1 = gen_reg_rtx (half_mode);
28417 ix86_expand_vector_init_interleave (half_mode, op0, ops,
28419 ix86_expand_vector_init_interleave (half_mode, op1,
28420 &ops [n >> 1], n >> 2);
28421 emit_insn (gen_rtx_SET (VOIDmode, target,
28422 gen_rtx_VEC_CONCAT (mode, op0, op1)));
28426 if (!TARGET_SSE4_1)
28434 /* Don't use ix86_expand_vector_init_interleave if we can't
28435 move from GPR to SSE register directly. */
28436 if (!TARGET_INTER_UNIT_MOVES)
28439 n = GET_MODE_NUNITS (mode);
28440 for (i = 0; i < n; i++)
28441 ops[i] = XVECEXP (vals, 0, i);
28442 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
28450 gcc_unreachable ();
28454 int i, j, n_elts, n_words, n_elt_per_word;
28455 enum machine_mode inner_mode;
28456 rtx words[4], shift;
28458 inner_mode = GET_MODE_INNER (mode);
28459 n_elts = GET_MODE_NUNITS (mode);
28460 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
28461 n_elt_per_word = n_elts / n_words;
28462 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
28464 for (i = 0; i < n_words; ++i)
28466 rtx word = NULL_RTX;
28468 for (j = 0; j < n_elt_per_word; ++j)
28470 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
28471 elt = convert_modes (word_mode, inner_mode, elt, true);
28477 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
28478 word, 1, OPTAB_LIB_WIDEN);
28479 word = expand_simple_binop (word_mode, IOR, word, elt,
28480 word, 1, OPTAB_LIB_WIDEN);
28488 emit_move_insn (target, gen_lowpart (mode, words[0]));
28489 else if (n_words == 2)
28491 rtx tmp = gen_reg_rtx (mode);
28492 emit_clobber (tmp);
28493 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
28494 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
28495 emit_move_insn (target, tmp);
28497 else if (n_words == 4)
28499 rtx tmp = gen_reg_rtx (V4SImode);
28500 gcc_assert (word_mode == SImode);
28501 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
28502 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
28503 emit_move_insn (target, gen_lowpart (mode, tmp));
28506 gcc_unreachable ();
28510 /* Initialize vector TARGET via VALS. Suppress the use of MMX
28511 instructions unless MMX_OK is true. */
28514 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
28516 enum machine_mode mode = GET_MODE (target);
28517 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28518 int n_elts = GET_MODE_NUNITS (mode);
28519 int n_var = 0, one_var = -1;
28520 bool all_same = true, all_const_zero = true;
28524 for (i = 0; i < n_elts; ++i)
28526 x = XVECEXP (vals, 0, i);
28527 if (!(CONST_INT_P (x)
28528 || GET_CODE (x) == CONST_DOUBLE
28529 || GET_CODE (x) == CONST_FIXED))
28530 n_var++, one_var = i;
28531 else if (x != CONST0_RTX (inner_mode))
28532 all_const_zero = false;
28533 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
28537 /* Constants are best loaded from the constant pool. */
28540 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
28544 /* If all values are identical, broadcast the value. */
28546 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
28547 XVECEXP (vals, 0, 0)))
28550 /* Values where only one field is non-constant are best loaded from
28551 the pool and overwritten via move later. */
28555 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
28556 XVECEXP (vals, 0, one_var),
28560 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
28564 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
28568 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
28570 enum machine_mode mode = GET_MODE (target);
28571 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28572 enum machine_mode half_mode;
28573 bool use_vec_merge = false;
28575 static rtx (*gen_extract[6][2]) (rtx, rtx)
28577 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
28578 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
28579 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
28580 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
28581 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
28582 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
28584 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
28586 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
28587 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
28588 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
28589 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
28590 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
28591 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
28601 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
28602 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
28604 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
28606 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
28607 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28613 use_vec_merge = TARGET_SSE4_1;
28621 /* For the two element vectors, we implement a VEC_CONCAT with
28622 the extraction of the other element. */
28624 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
28625 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
28628 op0 = val, op1 = tmp;
28630 op0 = tmp, op1 = val;
28632 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
28633 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28638 use_vec_merge = TARGET_SSE4_1;
28645 use_vec_merge = true;
28649 /* tmp = target = A B C D */
28650 tmp = copy_to_reg (target);
28651 /* target = A A B B */
28652 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
28653 /* target = X A B B */
28654 ix86_expand_vector_set (false, target, val, 0);
28655 /* target = A X C D */
28656 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28657 const1_rtx, const0_rtx,
28658 GEN_INT (2+4), GEN_INT (3+4)));
28662 /* tmp = target = A B C D */
28663 tmp = copy_to_reg (target);
28664 /* tmp = X B C D */
28665 ix86_expand_vector_set (false, tmp, val, 0);
28666 /* target = A B X D */
28667 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28668 const0_rtx, const1_rtx,
28669 GEN_INT (0+4), GEN_INT (3+4)));
28673 /* tmp = target = A B C D */
28674 tmp = copy_to_reg (target);
28675 /* tmp = X B C D */
28676 ix86_expand_vector_set (false, tmp, val, 0);
28677 /* target = A B X D */
28678 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28679 const0_rtx, const1_rtx,
28680 GEN_INT (2+4), GEN_INT (0+4)));
28684 gcc_unreachable ();
28689 use_vec_merge = TARGET_SSE4_1;
28693 /* Element 0 handled by vec_merge below. */
28696 use_vec_merge = true;
28702 /* With SSE2, use integer shuffles to swap element 0 and ELT,
28703 store into element 0, then shuffle them back. */
28707 order[0] = GEN_INT (elt);
28708 order[1] = const1_rtx;
28709 order[2] = const2_rtx;
28710 order[3] = GEN_INT (3);
28711 order[elt] = const0_rtx;
28713 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28714 order[1], order[2], order[3]));
28716 ix86_expand_vector_set (false, target, val, 0);
28718 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28719 order[1], order[2], order[3]));
28723 /* For SSE1, we have to reuse the V4SF code. */
28724 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
28725 gen_lowpart (SFmode, val), elt);
28730 use_vec_merge = TARGET_SSE2;
28733 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28737 use_vec_merge = TARGET_SSE4_1;
28744 half_mode = V16QImode;
28750 half_mode = V8HImode;
28756 half_mode = V4SImode;
28762 half_mode = V2DImode;
28768 half_mode = V4SFmode;
28774 half_mode = V2DFmode;
28780 /* Compute offset. */
28784 gcc_assert (i <= 1);
28786 /* Extract the half. */
28787 tmp = gen_reg_rtx (half_mode);
28788 emit_insn (gen_extract[j][i] (tmp, target));
28790 /* Put val in tmp at elt. */
28791 ix86_expand_vector_set (false, tmp, val, elt);
28794 emit_insn (gen_insert[j][i] (target, target, tmp));
28803 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
28804 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
28805 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28809 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28811 emit_move_insn (mem, target);
28813 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28814 emit_move_insn (tmp, val);
28816 emit_move_insn (target, mem);
28821 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
28823 enum machine_mode mode = GET_MODE (vec);
28824 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28825 bool use_vec_extr = false;
28838 use_vec_extr = true;
28842 use_vec_extr = TARGET_SSE4_1;
28854 tmp = gen_reg_rtx (mode);
28855 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
28856 GEN_INT (elt), GEN_INT (elt),
28857 GEN_INT (elt+4), GEN_INT (elt+4)));
28861 tmp = gen_reg_rtx (mode);
28862 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
28866 gcc_unreachable ();
28869 use_vec_extr = true;
28874 use_vec_extr = TARGET_SSE4_1;
28888 tmp = gen_reg_rtx (mode);
28889 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
28890 GEN_INT (elt), GEN_INT (elt),
28891 GEN_INT (elt), GEN_INT (elt)));
28895 tmp = gen_reg_rtx (mode);
28896 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
28900 gcc_unreachable ();
28903 use_vec_extr = true;
28908 /* For SSE1, we have to reuse the V4SF code. */
28909 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
28910 gen_lowpart (V4SFmode, vec), elt);
28916 use_vec_extr = TARGET_SSE2;
28919 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28923 use_vec_extr = TARGET_SSE4_1;
28927 /* ??? Could extract the appropriate HImode element and shift. */
28934 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
28935 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
28937 /* Let the rtl optimizers know about the zero extension performed. */
28938 if (inner_mode == QImode || inner_mode == HImode)
28940 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
28941 target = gen_lowpart (SImode, target);
28944 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28948 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28950 emit_move_insn (mem, vec);
28952 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28953 emit_move_insn (target, tmp);
28957 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
28958 pattern to reduce; DEST is the destination; IN is the input vector. */
28961 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
28963 rtx tmp1, tmp2, tmp3;
28965 tmp1 = gen_reg_rtx (V4SFmode);
28966 tmp2 = gen_reg_rtx (V4SFmode);
28967 tmp3 = gen_reg_rtx (V4SFmode);
28969 emit_insn (gen_sse_movhlps (tmp1, in, in));
28970 emit_insn (fn (tmp2, tmp1, in));
28972 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
28973 const1_rtx, const1_rtx,
28974 GEN_INT (1+4), GEN_INT (1+4)));
28975 emit_insn (fn (dest, tmp2, tmp3));
28978 /* Target hook for scalar_mode_supported_p. */
28980 ix86_scalar_mode_supported_p (enum machine_mode mode)
28982 if (DECIMAL_FLOAT_MODE_P (mode))
28983 return default_decimal_float_supported_p ();
28984 else if (mode == TFmode)
28987 return default_scalar_mode_supported_p (mode);
28990 /* Implements target hook vector_mode_supported_p. */
28992 ix86_vector_mode_supported_p (enum machine_mode mode)
28994 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
28996 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
28998 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
29000 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
29002 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
29007 /* Target hook for c_mode_for_suffix. */
29008 static enum machine_mode
29009 ix86_c_mode_for_suffix (char suffix)
29019 /* Worker function for TARGET_MD_ASM_CLOBBERS.
29021 We do this in the new i386 backend to maintain source compatibility
29022 with the old cc0-based compiler. */
29025 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
29026 tree inputs ATTRIBUTE_UNUSED,
29029 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
29031 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
29036 /* Implements target vector targetm.asm.encode_section_info. This
29037 is not used by netware. */
29039 static void ATTRIBUTE_UNUSED
29040 ix86_encode_section_info (tree decl, rtx rtl, int first)
29042 default_encode_section_info (decl, rtl, first);
29044 if (TREE_CODE (decl) == VAR_DECL
29045 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
29046 && ix86_in_large_data_p (decl))
29047 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
29050 /* Worker function for REVERSE_CONDITION. */
29053 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
29055 return (mode != CCFPmode && mode != CCFPUmode
29056 ? reverse_condition (code)
29057 : reverse_condition_maybe_unordered (code));
29060 /* Output code to perform an x87 FP register move, from OPERANDS[1]
29064 output_387_reg_move (rtx insn, rtx *operands)
29066 if (REG_P (operands[0]))
29068 if (REG_P (operands[1])
29069 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
29071 if (REGNO (operands[0]) == FIRST_STACK_REG)
29072 return output_387_ffreep (operands, 0);
29073 return "fstp\t%y0";
29075 if (STACK_TOP_P (operands[0]))
29076 return "fld%Z1\t%y1";
29079 else if (MEM_P (operands[0]))
29081 gcc_assert (REG_P (operands[1]));
29082 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
29083 return "fstp%Z0\t%y0";
29086 /* There is no non-popping store to memory for XFmode.
29087 So if we need one, follow the store with a load. */
29088 if (GET_MODE (operands[0]) == XFmode)
29089 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
29091 return "fst%Z0\t%y0";
29098 /* Output code to perform a conditional jump to LABEL, if C2 flag in
29099 FP status register is set. */
29102 ix86_emit_fp_unordered_jump (rtx label)
29104 rtx reg = gen_reg_rtx (HImode);
29107 emit_insn (gen_x86_fnstsw_1 (reg));
29109 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
29111 emit_insn (gen_x86_sahf_1 (reg));
29113 temp = gen_rtx_REG (CCmode, FLAGS_REG);
29114 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
29118 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
29120 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
29121 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
29124 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
29125 gen_rtx_LABEL_REF (VOIDmode, label),
29127 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
29129 emit_jump_insn (temp);
29130 predict_jump (REG_BR_PROB_BASE * 10 / 100);
29133 /* Output code to perform a log1p XFmode calculation. */
29135 void ix86_emit_i387_log1p (rtx op0, rtx op1)
29137 rtx label1 = gen_label_rtx ();
29138 rtx label2 = gen_label_rtx ();
29140 rtx tmp = gen_reg_rtx (XFmode);
29141 rtx tmp2 = gen_reg_rtx (XFmode);
29144 emit_insn (gen_absxf2 (tmp, op1));
29145 test = gen_rtx_GE (VOIDmode, tmp,
29146 CONST_DOUBLE_FROM_REAL_VALUE (
29147 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
29149 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
29151 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
29152 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
29153 emit_jump (label2);
29155 emit_label (label1);
29156 emit_move_insn (tmp, CONST1_RTX (XFmode));
29157 emit_insn (gen_addxf3 (tmp, op1, tmp));
29158 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
29159 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
29161 emit_label (label2);
29164 /* Output code to perform a Newton-Rhapson approximation of a single precision
29165 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
29167 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
29169 rtx x0, x1, e0, e1, two;
29171 x0 = gen_reg_rtx (mode);
29172 e0 = gen_reg_rtx (mode);
29173 e1 = gen_reg_rtx (mode);
29174 x1 = gen_reg_rtx (mode);
29176 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
29178 if (VECTOR_MODE_P (mode))
29179 two = ix86_build_const_vector (SFmode, true, two);
29181 two = force_reg (mode, two);
29183 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
29185 /* x0 = rcp(b) estimate */
29186 emit_insn (gen_rtx_SET (VOIDmode, x0,
29187 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
29190 emit_insn (gen_rtx_SET (VOIDmode, e0,
29191 gen_rtx_MULT (mode, x0, a)));
29193 emit_insn (gen_rtx_SET (VOIDmode, e1,
29194 gen_rtx_MULT (mode, x0, b)));
29196 emit_insn (gen_rtx_SET (VOIDmode, x1,
29197 gen_rtx_MINUS (mode, two, e1)));
29198 /* res = e0 * x1 */
29199 emit_insn (gen_rtx_SET (VOIDmode, res,
29200 gen_rtx_MULT (mode, e0, x1)));
29203 /* Output code to perform a Newton-Rhapson approximation of a
29204 single precision floating point [reciprocal] square root. */
29206 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
29209 rtx x0, e0, e1, e2, e3, mthree, mhalf;
29212 x0 = gen_reg_rtx (mode);
29213 e0 = gen_reg_rtx (mode);
29214 e1 = gen_reg_rtx (mode);
29215 e2 = gen_reg_rtx (mode);
29216 e3 = gen_reg_rtx (mode);
29218 real_from_integer (&r, VOIDmode, -3, -1, 0);
29219 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
29221 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
29222 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
29224 if (VECTOR_MODE_P (mode))
29226 mthree = ix86_build_const_vector (SFmode, true, mthree);
29227 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
29230 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
29231 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
29233 /* x0 = rsqrt(a) estimate */
29234 emit_insn (gen_rtx_SET (VOIDmode, x0,
29235 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
29238 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
29243 zero = gen_reg_rtx (mode);
29244 mask = gen_reg_rtx (mode);
29246 zero = force_reg (mode, CONST0_RTX(mode));
29247 emit_insn (gen_rtx_SET (VOIDmode, mask,
29248 gen_rtx_NE (mode, zero, a)));
29250 emit_insn (gen_rtx_SET (VOIDmode, x0,
29251 gen_rtx_AND (mode, x0, mask)));
29255 emit_insn (gen_rtx_SET (VOIDmode, e0,
29256 gen_rtx_MULT (mode, x0, a)));
29258 emit_insn (gen_rtx_SET (VOIDmode, e1,
29259 gen_rtx_MULT (mode, e0, x0)));
29262 mthree = force_reg (mode, mthree);
29263 emit_insn (gen_rtx_SET (VOIDmode, e2,
29264 gen_rtx_PLUS (mode, e1, mthree)));
29266 mhalf = force_reg (mode, mhalf);
29268 /* e3 = -.5 * x0 */
29269 emit_insn (gen_rtx_SET (VOIDmode, e3,
29270 gen_rtx_MULT (mode, x0, mhalf)));
29272 /* e3 = -.5 * e0 */
29273 emit_insn (gen_rtx_SET (VOIDmode, e3,
29274 gen_rtx_MULT (mode, e0, mhalf)));
29275 /* ret = e2 * e3 */
29276 emit_insn (gen_rtx_SET (VOIDmode, res,
29277 gen_rtx_MULT (mode, e2, e3)));
29280 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
29282 static void ATTRIBUTE_UNUSED
29283 i386_solaris_elf_named_section (const char *name, unsigned int flags,
29286 /* With Binutils 2.15, the "@unwind" marker must be specified on
29287 every occurrence of the ".eh_frame" section, not just the first
29290 && strcmp (name, ".eh_frame") == 0)
29292 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
29293 flags & SECTION_WRITE ? "aw" : "a");
29296 default_elf_asm_named_section (name, flags, decl);
29299 /* Return the mangling of TYPE if it is an extended fundamental type. */
29301 static const char *
29302 ix86_mangle_type (const_tree type)
29304 type = TYPE_MAIN_VARIANT (type);
29306 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
29307 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
29310 switch (TYPE_MODE (type))
29313 /* __float128 is "g". */
29316 /* "long double" or __float80 is "e". */
29323 /* For 32-bit code we can save PIC register setup by using
29324 __stack_chk_fail_local hidden function instead of calling
29325 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
29326 register, so it is better to call __stack_chk_fail directly. */
29329 ix86_stack_protect_fail (void)
29331 return TARGET_64BIT
29332 ? default_external_stack_protect_fail ()
29333 : default_hidden_stack_protect_fail ();
29336 /* Select a format to encode pointers in exception handling data. CODE
29337 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
29338 true if the symbol may be affected by dynamic relocations.
29340 ??? All x86 object file formats are capable of representing this.
29341 After all, the relocation needed is the same as for the call insn.
29342 Whether or not a particular assembler allows us to enter such, I
29343 guess we'll have to see. */
29345 asm_preferred_eh_data_format (int code, int global)
29349 int type = DW_EH_PE_sdata8;
29351 || ix86_cmodel == CM_SMALL_PIC
29352 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
29353 type = DW_EH_PE_sdata4;
29354 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
29356 if (ix86_cmodel == CM_SMALL
29357 || (ix86_cmodel == CM_MEDIUM && code))
29358 return DW_EH_PE_udata4;
29359 return DW_EH_PE_absptr;
29362 /* Expand copysign from SIGN to the positive value ABS_VALUE
29363 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
29366 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
29368 enum machine_mode mode = GET_MODE (sign);
29369 rtx sgn = gen_reg_rtx (mode);
29370 if (mask == NULL_RTX)
29372 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
29373 if (!VECTOR_MODE_P (mode))
29375 /* We need to generate a scalar mode mask in this case. */
29376 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29377 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29378 mask = gen_reg_rtx (mode);
29379 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29383 mask = gen_rtx_NOT (mode, mask);
29384 emit_insn (gen_rtx_SET (VOIDmode, sgn,
29385 gen_rtx_AND (mode, mask, sign)));
29386 emit_insn (gen_rtx_SET (VOIDmode, result,
29387 gen_rtx_IOR (mode, abs_value, sgn)));
29390 /* Expand fabs (OP0) and return a new rtx that holds the result. The
29391 mask for masking out the sign-bit is stored in *SMASK, if that is
29394 ix86_expand_sse_fabs (rtx op0, rtx *smask)
29396 enum machine_mode mode = GET_MODE (op0);
29399 xa = gen_reg_rtx (mode);
29400 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
29401 if (!VECTOR_MODE_P (mode))
29403 /* We need to generate a scalar mode mask in this case. */
29404 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29405 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29406 mask = gen_reg_rtx (mode);
29407 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29409 emit_insn (gen_rtx_SET (VOIDmode, xa,
29410 gen_rtx_AND (mode, op0, mask)));
29418 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
29419 swapping the operands if SWAP_OPERANDS is true. The expanded
29420 code is a forward jump to a newly created label in case the
29421 comparison is true. The generated label rtx is returned. */
29423 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
29424 bool swap_operands)
29435 label = gen_label_rtx ();
29436 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
29437 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29438 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
29439 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
29440 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
29441 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
29442 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
29443 JUMP_LABEL (tmp) = label;
29448 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
29449 using comparison code CODE. Operands are swapped for the comparison if
29450 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
29452 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
29453 bool swap_operands)
29455 enum machine_mode mode = GET_MODE (op0);
29456 rtx mask = gen_reg_rtx (mode);
29465 if (mode == DFmode)
29466 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
29467 gen_rtx_fmt_ee (code, mode, op0, op1)));
29469 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
29470 gen_rtx_fmt_ee (code, mode, op0, op1)));
29475 /* Generate and return a rtx of mode MODE for 2**n where n is the number
29476 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
29478 ix86_gen_TWO52 (enum machine_mode mode)
29480 REAL_VALUE_TYPE TWO52r;
29483 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
29484 TWO52 = const_double_from_real_value (TWO52r, mode);
29485 TWO52 = force_reg (mode, TWO52);
29490 /* Expand SSE sequence for computing lround from OP1 storing
29493 ix86_expand_lround (rtx op0, rtx op1)
29495 /* C code for the stuff we're doing below:
29496 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
29499 enum machine_mode mode = GET_MODE (op1);
29500 const struct real_format *fmt;
29501 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29504 /* load nextafter (0.5, 0.0) */
29505 fmt = REAL_MODE_FORMAT (mode);
29506 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29507 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29509 /* adj = copysign (0.5, op1) */
29510 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
29511 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
29513 /* adj = op1 + adj */
29514 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
29516 /* op0 = (imode)adj */
29517 expand_fix (op0, adj, 0);
29520 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
29523 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
29525 /* C code for the stuff we're doing below (for do_floor):
29527 xi -= (double)xi > op1 ? 1 : 0;
29530 enum machine_mode fmode = GET_MODE (op1);
29531 enum machine_mode imode = GET_MODE (op0);
29532 rtx ireg, freg, label, tmp;
29534 /* reg = (long)op1 */
29535 ireg = gen_reg_rtx (imode);
29536 expand_fix (ireg, op1, 0);
29538 /* freg = (double)reg */
29539 freg = gen_reg_rtx (fmode);
29540 expand_float (freg, ireg, 0);
29542 /* ireg = (freg > op1) ? ireg - 1 : ireg */
29543 label = ix86_expand_sse_compare_and_jump (UNLE,
29544 freg, op1, !do_floor);
29545 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
29546 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
29547 emit_move_insn (ireg, tmp);
29549 emit_label (label);
29550 LABEL_NUSES (label) = 1;
29552 emit_move_insn (op0, ireg);
29555 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
29556 result in OPERAND0. */
29558 ix86_expand_rint (rtx operand0, rtx operand1)
29560 /* C code for the stuff we're doing below:
29561 xa = fabs (operand1);
29562 if (!isless (xa, 2**52))
29564 xa = xa + 2**52 - 2**52;
29565 return copysign (xa, operand1);
29567 enum machine_mode mode = GET_MODE (operand0);
29568 rtx res, xa, label, TWO52, mask;
29570 res = gen_reg_rtx (mode);
29571 emit_move_insn (res, operand1);
29573 /* xa = abs (operand1) */
29574 xa = ix86_expand_sse_fabs (res, &mask);
29576 /* if (!isless (xa, TWO52)) goto label; */
29577 TWO52 = ix86_gen_TWO52 (mode);
29578 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29580 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29581 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29583 ix86_sse_copysign_to_positive (res, xa, res, mask);
29585 emit_label (label);
29586 LABEL_NUSES (label) = 1;
29588 emit_move_insn (operand0, res);
29591 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29594 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
29596 /* C code for the stuff we expand below.
29597 double xa = fabs (x), x2;
29598 if (!isless (xa, TWO52))
29600 xa = xa + TWO52 - TWO52;
29601 x2 = copysign (xa, x);
29610 enum machine_mode mode = GET_MODE (operand0);
29611 rtx xa, TWO52, tmp, label, one, res, mask;
29613 TWO52 = ix86_gen_TWO52 (mode);
29615 /* Temporary for holding the result, initialized to the input
29616 operand to ease control flow. */
29617 res = gen_reg_rtx (mode);
29618 emit_move_insn (res, operand1);
29620 /* xa = abs (operand1) */
29621 xa = ix86_expand_sse_fabs (res, &mask);
29623 /* if (!isless (xa, TWO52)) goto label; */
29624 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29626 /* xa = xa + TWO52 - TWO52; */
29627 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29628 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29630 /* xa = copysign (xa, operand1) */
29631 ix86_sse_copysign_to_positive (xa, xa, res, mask);
29633 /* generate 1.0 or -1.0 */
29634 one = force_reg (mode,
29635 const_double_from_real_value (do_floor
29636 ? dconst1 : dconstm1, mode));
29638 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29639 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29640 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29641 gen_rtx_AND (mode, one, tmp)));
29642 /* We always need to subtract here to preserve signed zero. */
29643 tmp = expand_simple_binop (mode, MINUS,
29644 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29645 emit_move_insn (res, tmp);
29647 emit_label (label);
29648 LABEL_NUSES (label) = 1;
29650 emit_move_insn (operand0, res);
29653 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29656 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
29658 /* C code for the stuff we expand below.
29659 double xa = fabs (x), x2;
29660 if (!isless (xa, TWO52))
29662 x2 = (double)(long)x;
29669 if (HONOR_SIGNED_ZEROS (mode))
29670 return copysign (x2, x);
29673 enum machine_mode mode = GET_MODE (operand0);
29674 rtx xa, xi, TWO52, tmp, label, one, res, mask;
29676 TWO52 = ix86_gen_TWO52 (mode);
29678 /* Temporary for holding the result, initialized to the input
29679 operand to ease control flow. */
29680 res = gen_reg_rtx (mode);
29681 emit_move_insn (res, operand1);
29683 /* xa = abs (operand1) */
29684 xa = ix86_expand_sse_fabs (res, &mask);
29686 /* if (!isless (xa, TWO52)) goto label; */
29687 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29689 /* xa = (double)(long)x */
29690 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29691 expand_fix (xi, res, 0);
29692 expand_float (xa, xi, 0);
29695 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29697 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29698 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29699 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29700 gen_rtx_AND (mode, one, tmp)));
29701 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
29702 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29703 emit_move_insn (res, tmp);
29705 if (HONOR_SIGNED_ZEROS (mode))
29706 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29708 emit_label (label);
29709 LABEL_NUSES (label) = 1;
29711 emit_move_insn (operand0, res);
29714 /* Expand SSE sequence for computing round from OPERAND1 storing
29715 into OPERAND0. Sequence that works without relying on DImode truncation
29716 via cvttsd2siq that is only available on 64bit targets. */
29718 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
29720 /* C code for the stuff we expand below.
29721 double xa = fabs (x), xa2, x2;
29722 if (!isless (xa, TWO52))
29724 Using the absolute value and copying back sign makes
29725 -0.0 -> -0.0 correct.
29726 xa2 = xa + TWO52 - TWO52;
29731 else if (dxa > 0.5)
29733 x2 = copysign (xa2, x);
29736 enum machine_mode mode = GET_MODE (operand0);
29737 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
29739 TWO52 = ix86_gen_TWO52 (mode);
29741 /* Temporary for holding the result, initialized to the input
29742 operand to ease control flow. */
29743 res = gen_reg_rtx (mode);
29744 emit_move_insn (res, operand1);
29746 /* xa = abs (operand1) */
29747 xa = ix86_expand_sse_fabs (res, &mask);
29749 /* if (!isless (xa, TWO52)) goto label; */
29750 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29752 /* xa2 = xa + TWO52 - TWO52; */
29753 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29754 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
29756 /* dxa = xa2 - xa; */
29757 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
29759 /* generate 0.5, 1.0 and -0.5 */
29760 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
29761 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
29762 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
29766 tmp = gen_reg_rtx (mode);
29767 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
29768 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
29769 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29770 gen_rtx_AND (mode, one, tmp)));
29771 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29772 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
29773 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
29774 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29775 gen_rtx_AND (mode, one, tmp)));
29776 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29778 /* res = copysign (xa2, operand1) */
29779 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
29781 emit_label (label);
29782 LABEL_NUSES (label) = 1;
29784 emit_move_insn (operand0, res);
29787 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29790 ix86_expand_trunc (rtx operand0, rtx operand1)
29792 /* C code for SSE variant we expand below.
29793 double xa = fabs (x), x2;
29794 if (!isless (xa, TWO52))
29796 x2 = (double)(long)x;
29797 if (HONOR_SIGNED_ZEROS (mode))
29798 return copysign (x2, x);
29801 enum machine_mode mode = GET_MODE (operand0);
29802 rtx xa, xi, TWO52, label, res, mask;
29804 TWO52 = ix86_gen_TWO52 (mode);
29806 /* Temporary for holding the result, initialized to the input
29807 operand to ease control flow. */
29808 res = gen_reg_rtx (mode);
29809 emit_move_insn (res, operand1);
29811 /* xa = abs (operand1) */
29812 xa = ix86_expand_sse_fabs (res, &mask);
29814 /* if (!isless (xa, TWO52)) goto label; */
29815 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29817 /* x = (double)(long)x */
29818 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29819 expand_fix (xi, res, 0);
29820 expand_float (res, xi, 0);
29822 if (HONOR_SIGNED_ZEROS (mode))
29823 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29825 emit_label (label);
29826 LABEL_NUSES (label) = 1;
29828 emit_move_insn (operand0, res);
29831 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29834 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
29836 enum machine_mode mode = GET_MODE (operand0);
29837 rtx xa, mask, TWO52, label, one, res, smask, tmp;
29839 /* C code for SSE variant we expand below.
29840 double xa = fabs (x), x2;
29841 if (!isless (xa, TWO52))
29843 xa2 = xa + TWO52 - TWO52;
29847 x2 = copysign (xa2, x);
29851 TWO52 = ix86_gen_TWO52 (mode);
29853 /* Temporary for holding the result, initialized to the input
29854 operand to ease control flow. */
29855 res = gen_reg_rtx (mode);
29856 emit_move_insn (res, operand1);
29858 /* xa = abs (operand1) */
29859 xa = ix86_expand_sse_fabs (res, &smask);
29861 /* if (!isless (xa, TWO52)) goto label; */
29862 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29864 /* res = xa + TWO52 - TWO52; */
29865 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29866 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
29867 emit_move_insn (res, tmp);
29870 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29872 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
29873 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
29874 emit_insn (gen_rtx_SET (VOIDmode, mask,
29875 gen_rtx_AND (mode, mask, one)));
29876 tmp = expand_simple_binop (mode, MINUS,
29877 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
29878 emit_move_insn (res, tmp);
29880 /* res = copysign (res, operand1) */
29881 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
29883 emit_label (label);
29884 LABEL_NUSES (label) = 1;
29886 emit_move_insn (operand0, res);
29889 /* Expand SSE sequence for computing round from OPERAND1 storing
29892 ix86_expand_round (rtx operand0, rtx operand1)
29894 /* C code for the stuff we're doing below:
29895 double xa = fabs (x);
29896 if (!isless (xa, TWO52))
29898 xa = (double)(long)(xa + nextafter (0.5, 0.0));
29899 return copysign (xa, x);
29901 enum machine_mode mode = GET_MODE (operand0);
29902 rtx res, TWO52, xa, label, xi, half, mask;
29903 const struct real_format *fmt;
29904 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29906 /* Temporary for holding the result, initialized to the input
29907 operand to ease control flow. */
29908 res = gen_reg_rtx (mode);
29909 emit_move_insn (res, operand1);
29911 TWO52 = ix86_gen_TWO52 (mode);
29912 xa = ix86_expand_sse_fabs (res, &mask);
29913 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29915 /* load nextafter (0.5, 0.0) */
29916 fmt = REAL_MODE_FORMAT (mode);
29917 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29918 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29920 /* xa = xa + 0.5 */
29921 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
29922 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
29924 /* xa = (double)(int64_t)xa */
29925 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29926 expand_fix (xi, xa, 0);
29927 expand_float (xa, xi, 0);
29929 /* res = copysign (xa, operand1) */
29930 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
29932 emit_label (label);
29933 LABEL_NUSES (label) = 1;
29935 emit_move_insn (operand0, res);
29939 /* Table of valid machine attributes. */
29940 static const struct attribute_spec ix86_attribute_table[] =
29942 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
29943 /* Stdcall attribute says callee is responsible for popping arguments
29944 if they are not variable. */
29945 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29946 /* Fastcall attribute says callee is responsible for popping arguments
29947 if they are not variable. */
29948 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29949 /* Thiscall attribute says callee is responsible for popping arguments
29950 if they are not variable. */
29951 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29952 /* Cdecl attribute says the callee is a normal C declaration */
29953 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29954 /* Regparm attribute specifies how many integer arguments are to be
29955 passed in registers. */
29956 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
29957 /* Sseregparm attribute says we are using x86_64 calling conventions
29958 for FP arguments. */
29959 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29960 /* force_align_arg_pointer says this function realigns the stack at entry. */
29961 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
29962 false, true, true, ix86_handle_cconv_attribute },
29963 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
29964 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
29965 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
29966 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
29968 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
29969 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
29970 #ifdef SUBTARGET_ATTRIBUTE_TABLE
29971 SUBTARGET_ATTRIBUTE_TABLE,
29973 /* ms_abi and sysv_abi calling convention function attributes. */
29974 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
29975 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
29976 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute },
29978 { NULL, 0, 0, false, false, false, NULL }
29981 /* Implement targetm.vectorize.builtin_vectorization_cost. */
29983 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
29984 tree vectype ATTRIBUTE_UNUSED,
29985 int misalign ATTRIBUTE_UNUSED)
29987 switch (type_of_cost)
29990 return ix86_cost->scalar_stmt_cost;
29993 return ix86_cost->scalar_load_cost;
29996 return ix86_cost->scalar_store_cost;
29999 return ix86_cost->vec_stmt_cost;
30002 return ix86_cost->vec_align_load_cost;
30005 return ix86_cost->vec_store_cost;
30007 case vec_to_scalar:
30008 return ix86_cost->vec_to_scalar_cost;
30010 case scalar_to_vec:
30011 return ix86_cost->scalar_to_vec_cost;
30013 case unaligned_load:
30014 case unaligned_store:
30015 return ix86_cost->vec_unalign_load_cost;
30017 case cond_branch_taken:
30018 return ix86_cost->cond_taken_branch_cost;
30020 case cond_branch_not_taken:
30021 return ix86_cost->cond_not_taken_branch_cost;
30027 gcc_unreachable ();
30032 /* Implement targetm.vectorize.builtin_vec_perm. */
30035 ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
30037 tree itype = TREE_TYPE (vec_type);
30038 bool u = TYPE_UNSIGNED (itype);
30039 enum machine_mode vmode = TYPE_MODE (vec_type);
30040 enum ix86_builtins fcode;
30041 bool ok = TARGET_SSE2;
30047 fcode = IX86_BUILTIN_VEC_PERM_V4DF;
30050 fcode = IX86_BUILTIN_VEC_PERM_V2DF;
30052 itype = ix86_get_builtin_type (IX86_BT_DI);
30057 fcode = IX86_BUILTIN_VEC_PERM_V8SF;
30061 fcode = IX86_BUILTIN_VEC_PERM_V4SF;
30063 itype = ix86_get_builtin_type (IX86_BT_SI);
30067 fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
30070 fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI;
30073 fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI;
30076 fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
30086 *mask_type = itype;
30087 return ix86_builtins[(int) fcode];
30090 /* Return a vector mode with twice as many elements as VMODE. */
30091 /* ??? Consider moving this to a table generated by genmodes.c. */
30093 static enum machine_mode
30094 doublesize_vector_mode (enum machine_mode vmode)
30098 case V2SFmode: return V4SFmode;
30099 case V1DImode: return V2DImode;
30100 case V2SImode: return V4SImode;
30101 case V4HImode: return V8HImode;
30102 case V8QImode: return V16QImode;
30104 case V2DFmode: return V4DFmode;
30105 case V4SFmode: return V8SFmode;
30106 case V2DImode: return V4DImode;
30107 case V4SImode: return V8SImode;
30108 case V8HImode: return V16HImode;
30109 case V16QImode: return V32QImode;
30111 case V4DFmode: return V8DFmode;
30112 case V8SFmode: return V16SFmode;
30113 case V4DImode: return V8DImode;
30114 case V8SImode: return V16SImode;
30115 case V16HImode: return V32HImode;
30116 case V32QImode: return V64QImode;
30119 gcc_unreachable ();
30123 /* Construct (set target (vec_select op0 (parallel perm))) and
30124 return true if that's a valid instruction in the active ISA. */
30127 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
30129 rtx rperm[MAX_VECT_LEN], x;
30132 for (i = 0; i < nelt; ++i)
30133 rperm[i] = GEN_INT (perm[i]);
30135 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
30136 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
30137 x = gen_rtx_SET (VOIDmode, target, x);
30140 if (recog_memoized (x) < 0)
30148 /* Similar, but generate a vec_concat from op0 and op1 as well. */
30151 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
30152 const unsigned char *perm, unsigned nelt)
30154 enum machine_mode v2mode;
30157 v2mode = doublesize_vector_mode (GET_MODE (op0));
30158 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
30159 return expand_vselect (target, x, perm, nelt);
30162 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
30163 in terms of blendp[sd] / pblendw / pblendvb. */
30166 expand_vec_perm_blend (struct expand_vec_perm_d *d)
30168 enum machine_mode vmode = d->vmode;
30169 unsigned i, mask, nelt = d->nelt;
30170 rtx target, op0, op1, x;
30172 if (!TARGET_SSE4_1 || d->op0 == d->op1)
30174 if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode))
30177 /* This is a blend, not a permute. Elements must stay in their
30178 respective lanes. */
30179 for (i = 0; i < nelt; ++i)
30181 unsigned e = d->perm[i];
30182 if (!(e == i || e == i + nelt))
30189 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
30190 decision should be extracted elsewhere, so that we only try that
30191 sequence once all budget==3 options have been tried. */
30193 /* For bytes, see if bytes move in pairs so we can use pblendw with
30194 an immediate argument, rather than pblendvb with a vector argument. */
30195 if (vmode == V16QImode)
30197 bool pblendw_ok = true;
30198 for (i = 0; i < 16 && pblendw_ok; i += 2)
30199 pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]);
30203 rtx rperm[16], vperm;
30205 for (i = 0; i < nelt; ++i)
30206 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
30208 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
30209 vperm = force_reg (V16QImode, vperm);
30211 emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm));
30216 target = d->target;
30228 for (i = 0; i < nelt; ++i)
30229 mask |= (d->perm[i] >= nelt) << i;
30233 for (i = 0; i < 2; ++i)
30234 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
30238 for (i = 0; i < 4; ++i)
30239 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
30243 for (i = 0; i < 8; ++i)
30244 mask |= (d->perm[i * 2] >= 16) << i;
30248 target = gen_lowpart (vmode, target);
30249 op0 = gen_lowpart (vmode, op0);
30250 op1 = gen_lowpart (vmode, op1);
30254 gcc_unreachable ();
30257 /* This matches five different patterns with the different modes. */
30258 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
30259 x = gen_rtx_SET (VOIDmode, target, x);
30265 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
30266 in terms of the variable form of vpermilps.
30268 Note that we will have already failed the immediate input vpermilps,
30269 which requires that the high and low part shuffle be identical; the
30270 variable form doesn't require that. */
30273 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
30275 rtx rperm[8], vperm;
30278 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
30281 /* We can only permute within the 128-bit lane. */
30282 for (i = 0; i < 8; ++i)
30284 unsigned e = d->perm[i];
30285 if (i < 4 ? e >= 4 : e < 4)
30292 for (i = 0; i < 8; ++i)
30294 unsigned e = d->perm[i];
30296 /* Within each 128-bit lane, the elements of op0 are numbered
30297 from 0 and the elements of op1 are numbered from 4. */
30303 rperm[i] = GEN_INT (e);
30306 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
30307 vperm = force_reg (V8SImode, vperm);
30308 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
30313 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
30314 in terms of pshufb or vpperm. */
30317 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
30319 unsigned i, nelt, eltsz;
30320 rtx rperm[16], vperm, target, op0, op1;
30322 if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP))
30324 if (GET_MODE_SIZE (d->vmode) != 16)
30331 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
30333 for (i = 0; i < nelt; ++i)
30335 unsigned j, e = d->perm[i];
30336 for (j = 0; j < eltsz; ++j)
30337 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
30340 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
30341 vperm = force_reg (V16QImode, vperm);
30343 target = gen_lowpart (V16QImode, d->target);
30344 op0 = gen_lowpart (V16QImode, d->op0);
30345 if (d->op0 == d->op1)
30346 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
30349 op1 = gen_lowpart (V16QImode, d->op1);
30350 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
30356 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
30357 in a single instruction. */
30360 expand_vec_perm_1 (struct expand_vec_perm_d *d)
30362 unsigned i, nelt = d->nelt;
30363 unsigned char perm2[MAX_VECT_LEN];
30365 /* Check plain VEC_SELECT first, because AVX has instructions that could
30366 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
30367 input where SEL+CONCAT may not. */
30368 if (d->op0 == d->op1)
30370 int mask = nelt - 1;
30372 for (i = 0; i < nelt; i++)
30373 perm2[i] = d->perm[i] & mask;
30375 if (expand_vselect (d->target, d->op0, perm2, nelt))
30378 /* There are plenty of patterns in sse.md that are written for
30379 SEL+CONCAT and are not replicated for a single op. Perhaps
30380 that should be changed, to avoid the nastiness here. */
30382 /* Recognize interleave style patterns, which means incrementing
30383 every other permutation operand. */
30384 for (i = 0; i < nelt; i += 2)
30386 perm2[i] = d->perm[i] & mask;
30387 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
30389 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
30392 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
30395 for (i = 0; i < nelt; i += 4)
30397 perm2[i + 0] = d->perm[i + 0] & mask;
30398 perm2[i + 1] = d->perm[i + 1] & mask;
30399 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
30400 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
30403 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
30408 /* Finally, try the fully general two operand permute. */
30409 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
30412 /* Recognize interleave style patterns with reversed operands. */
30413 if (d->op0 != d->op1)
30415 for (i = 0; i < nelt; ++i)
30417 unsigned e = d->perm[i];
30425 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
30429 /* Try the SSE4.1 blend variable merge instructions. */
30430 if (expand_vec_perm_blend (d))
30433 /* Try one of the AVX vpermil variable permutations. */
30434 if (expand_vec_perm_vpermil (d))
30437 /* Try the SSSE3 pshufb or XOP vpperm variable permutation. */
30438 if (expand_vec_perm_pshufb (d))
30444 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
30445 in terms of a pair of pshuflw + pshufhw instructions. */
30448 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
30450 unsigned char perm2[MAX_VECT_LEN];
30454 if (d->vmode != V8HImode || d->op0 != d->op1)
30457 /* The two permutations only operate in 64-bit lanes. */
30458 for (i = 0; i < 4; ++i)
30459 if (d->perm[i] >= 4)
30461 for (i = 4; i < 8; ++i)
30462 if (d->perm[i] < 4)
30468 /* Emit the pshuflw. */
30469 memcpy (perm2, d->perm, 4);
30470 for (i = 4; i < 8; ++i)
30472 ok = expand_vselect (d->target, d->op0, perm2, 8);
30475 /* Emit the pshufhw. */
30476 memcpy (perm2 + 4, d->perm + 4, 4);
30477 for (i = 0; i < 4; ++i)
30479 ok = expand_vselect (d->target, d->target, perm2, 8);
30485 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
30486 the permutation using the SSSE3 palignr instruction. This succeeds
30487 when all of the elements in PERM fit within one vector and we merely
30488 need to shift them down so that a single vector permutation has a
30489 chance to succeed. */
30492 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
30494 unsigned i, nelt = d->nelt;
30499 /* Even with AVX, palignr only operates on 128-bit vectors. */
30500 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
30503 min = nelt, max = 0;
30504 for (i = 0; i < nelt; ++i)
30506 unsigned e = d->perm[i];
30512 if (min == 0 || max - min >= nelt)
30515 /* Given that we have SSSE3, we know we'll be able to implement the
30516 single operand permutation after the palignr with pshufb. */
30520 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
30521 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
30522 gen_lowpart (TImode, d->op1),
30523 gen_lowpart (TImode, d->op0), shift));
30525 d->op0 = d->op1 = d->target;
30528 for (i = 0; i < nelt; ++i)
30530 unsigned e = d->perm[i] - min;
30536 /* Test for the degenerate case where the alignment by itself
30537 produces the desired permutation. */
30541 ok = expand_vec_perm_1 (d);
30547 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
30548 a two vector permutation into a single vector permutation by using
30549 an interleave operation to merge the vectors. */
30552 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
30554 struct expand_vec_perm_d dremap, dfinal;
30555 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
30556 unsigned contents, h1, h2, h3, h4;
30557 unsigned char remap[2 * MAX_VECT_LEN];
30561 if (d->op0 == d->op1)
30564 /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
30565 lanes. We can use similar techniques with the vperm2f128 instruction,
30566 but it requires slightly different logic. */
30567 if (GET_MODE_SIZE (d->vmode) != 16)
30570 /* Examine from whence the elements come. */
30572 for (i = 0; i < nelt; ++i)
30573 contents |= 1u << d->perm[i];
30575 /* Split the two input vectors into 4 halves. */
30576 h1 = (1u << nelt2) - 1;
30581 memset (remap, 0xff, sizeof (remap));
30584 /* If the elements from the low halves use interleave low, and similarly
30585 for interleave high. If the elements are from mis-matched halves, we
30586 can use shufps for V4SF/V4SI or do a DImode shuffle. */
30587 if ((contents & (h1 | h3)) == contents)
30589 for (i = 0; i < nelt2; ++i)
30592 remap[i + nelt] = i * 2 + 1;
30593 dremap.perm[i * 2] = i;
30594 dremap.perm[i * 2 + 1] = i + nelt;
30597 else if ((contents & (h2 | h4)) == contents)
30599 for (i = 0; i < nelt2; ++i)
30601 remap[i + nelt2] = i * 2;
30602 remap[i + nelt + nelt2] = i * 2 + 1;
30603 dremap.perm[i * 2] = i + nelt2;
30604 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
30607 else if ((contents & (h1 | h4)) == contents)
30609 for (i = 0; i < nelt2; ++i)
30612 remap[i + nelt + nelt2] = i + nelt2;
30613 dremap.perm[i] = i;
30614 dremap.perm[i + nelt2] = i + nelt + nelt2;
30618 dremap.vmode = V2DImode;
30620 dremap.perm[0] = 0;
30621 dremap.perm[1] = 3;
30624 else if ((contents & (h2 | h3)) == contents)
30626 for (i = 0; i < nelt2; ++i)
30628 remap[i + nelt2] = i;
30629 remap[i + nelt] = i + nelt2;
30630 dremap.perm[i] = i + nelt2;
30631 dremap.perm[i + nelt2] = i + nelt;
30635 dremap.vmode = V2DImode;
30637 dremap.perm[0] = 1;
30638 dremap.perm[1] = 2;
30644 /* Use the remapping array set up above to move the elements from their
30645 swizzled locations into their final destinations. */
30647 for (i = 0; i < nelt; ++i)
30649 unsigned e = remap[d->perm[i]];
30650 gcc_assert (e < nelt);
30651 dfinal.perm[i] = e;
30653 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
30654 dfinal.op1 = dfinal.op0;
30655 dremap.target = dfinal.op0;
30657 /* Test if the final remap can be done with a single insn. For V4SFmode or
30658 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
30660 ok = expand_vec_perm_1 (&dfinal);
30661 seq = get_insns ();
30667 if (dremap.vmode != dfinal.vmode)
30669 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
30670 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
30671 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
30674 ok = expand_vec_perm_1 (&dremap);
30681 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
30682 permutation with two pshufb insns and an ior. We should have already
30683 failed all two instruction sequences. */
30686 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
30688 rtx rperm[2][16], vperm, l, h, op, m128;
30689 unsigned int i, nelt, eltsz;
30691 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
30693 gcc_assert (d->op0 != d->op1);
30696 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
30698 /* Generate two permutation masks. If the required element is within
30699 the given vector it is shuffled into the proper lane. If the required
30700 element is in the other vector, force a zero into the lane by setting
30701 bit 7 in the permutation mask. */
30702 m128 = GEN_INT (-128);
30703 for (i = 0; i < nelt; ++i)
30705 unsigned j, e = d->perm[i];
30706 unsigned which = (e >= nelt);
30710 for (j = 0; j < eltsz; ++j)
30712 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
30713 rperm[1-which][i*eltsz + j] = m128;
30717 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
30718 vperm = force_reg (V16QImode, vperm);
30720 l = gen_reg_rtx (V16QImode);
30721 op = gen_lowpart (V16QImode, d->op0);
30722 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
30724 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
30725 vperm = force_reg (V16QImode, vperm);
30727 h = gen_reg_rtx (V16QImode);
30728 op = gen_lowpart (V16QImode, d->op1);
30729 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
30731 op = gen_lowpart (V16QImode, d->target);
30732 emit_insn (gen_iorv16qi3 (op, l, h));
30737 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
30738 and extract-odd permutations. */
30741 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
30743 rtx t1, t2, t3, t4;
30748 t1 = gen_reg_rtx (V4DFmode);
30749 t2 = gen_reg_rtx (V4DFmode);
30751 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
30752 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
30753 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
30755 /* Now an unpck[lh]pd will produce the result required. */
30757 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
30759 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
30765 static const unsigned char perm1[8] = { 0, 2, 1, 3, 5, 6, 5, 7 };
30766 static const unsigned char perme[8] = { 0, 1, 8, 9, 4, 5, 12, 13 };
30767 static const unsigned char permo[8] = { 2, 3, 10, 11, 6, 7, 14, 15 };
30769 t1 = gen_reg_rtx (V8SFmode);
30770 t2 = gen_reg_rtx (V8SFmode);
30771 t3 = gen_reg_rtx (V8SFmode);
30772 t4 = gen_reg_rtx (V8SFmode);
30774 /* Shuffle within the 128-bit lanes to produce:
30775 { 0 2 1 3 4 6 5 7 } and { 8 a 9 b c e d f }. */
30776 expand_vselect (t1, d->op0, perm1, 8);
30777 expand_vselect (t2, d->op1, perm1, 8);
30779 /* Shuffle the lanes around to produce:
30780 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
30781 emit_insn (gen_avx_vperm2f128v8sf3 (t3, t1, t2, GEN_INT (0x20)));
30782 emit_insn (gen_avx_vperm2f128v8sf3 (t4, t1, t2, GEN_INT (0x31)));
30784 /* Now a vpermil2p will produce the result required. */
30785 /* ??? The vpermil2p requires a vector constant. Another option
30786 is a unpck[lh]ps to merge the two vectors to produce
30787 { 0 4 2 6 8 c a e } or { 1 5 3 7 9 d b f }. Then use another
30788 vpermilps to get the elements into the final order. */
30791 memcpy (d->perm, odd ? permo: perme, 8);
30792 expand_vec_perm_vpermil (d);
30800 /* These are always directly implementable by expand_vec_perm_1. */
30801 gcc_unreachable ();
30805 return expand_vec_perm_pshufb2 (d);
30808 /* We need 2*log2(N)-1 operations to achieve odd/even
30809 with interleave. */
30810 t1 = gen_reg_rtx (V8HImode);
30811 t2 = gen_reg_rtx (V8HImode);
30812 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
30813 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
30814 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
30815 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
30817 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
30819 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
30826 return expand_vec_perm_pshufb2 (d);
30829 t1 = gen_reg_rtx (V16QImode);
30830 t2 = gen_reg_rtx (V16QImode);
30831 t3 = gen_reg_rtx (V16QImode);
30832 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
30833 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
30834 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
30835 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
30836 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
30837 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
30839 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
30841 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
30847 gcc_unreachable ();
30853 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
30854 extract-even and extract-odd permutations. */
30857 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
30859 unsigned i, odd, nelt = d->nelt;
30862 if (odd != 0 && odd != 1)
30865 for (i = 1; i < nelt; ++i)
30866 if (d->perm[i] != 2 * i + odd)
30869 return expand_vec_perm_even_odd_1 (d, odd);
30872 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
30873 permutations. We assume that expand_vec_perm_1 has already failed. */
30876 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
30878 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
30879 enum machine_mode vmode = d->vmode;
30880 unsigned char perm2[4];
30888 /* These are special-cased in sse.md so that we can optionally
30889 use the vbroadcast instruction. They expand to two insns
30890 if the input happens to be in a register. */
30891 gcc_unreachable ();
30897 /* These are always implementable using standard shuffle patterns. */
30898 gcc_unreachable ();
30902 /* These can be implemented via interleave. We save one insn by
30903 stopping once we have promoted to V4SImode and then use pshufd. */
30906 optab otab = vec_interleave_low_optab;
30910 otab = vec_interleave_high_optab;
30915 op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
30916 vmode = get_mode_wider_vector (vmode);
30917 op0 = gen_lowpart (vmode, op0);
30919 while (vmode != V4SImode);
30921 memset (perm2, elt, 4);
30922 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
30927 gcc_unreachable ();
30931 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
30932 broadcast permutations. */
30935 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
30937 unsigned i, elt, nelt = d->nelt;
30939 if (d->op0 != d->op1)
30943 for (i = 1; i < nelt; ++i)
30944 if (d->perm[i] != elt)
30947 return expand_vec_perm_broadcast_1 (d);
30950 /* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
30951 With all of the interface bits taken care of, perform the expansion
30952 in D and return true on success. */
30955 ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
30957 /* Try a single instruction expansion. */
30958 if (expand_vec_perm_1 (d))
30961 /* Try sequences of two instructions. */
30963 if (expand_vec_perm_pshuflw_pshufhw (d))
30966 if (expand_vec_perm_palignr (d))
30969 if (expand_vec_perm_interleave2 (d))
30972 if (expand_vec_perm_broadcast (d))
30975 /* Try sequences of three instructions. */
30977 if (expand_vec_perm_pshufb2 (d))
30980 /* ??? Look for narrow permutations whose element orderings would
30981 allow the promotion to a wider mode. */
30983 /* ??? Look for sequences of interleave or a wider permute that place
30984 the data into the correct lanes for a half-vector shuffle like
30985 pshuf[lh]w or vpermilps. */
30987 /* ??? Look for sequences of interleave that produce the desired results.
30988 The combinatorics of punpck[lh] get pretty ugly... */
30990 if (expand_vec_perm_even_odd (d))
30996 /* Extract the values from the vector CST into the permutation array in D.
30997 Return 0 on error, 1 if all values from the permutation come from the
30998 first vector, 2 if all values from the second vector, and 3 otherwise. */
31001 extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst)
31003 tree list = TREE_VECTOR_CST_ELTS (cst);
31004 unsigned i, nelt = d->nelt;
31007 for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
31009 unsigned HOST_WIDE_INT e;
31011 if (!host_integerp (TREE_VALUE (list), 1))
31013 e = tree_low_cst (TREE_VALUE (list), 1);
31017 ret |= (e < nelt ? 1 : 2);
31020 gcc_assert (list == NULL);
31022 /* For all elements from second vector, fold the elements to first. */
31024 for (i = 0; i < nelt; ++i)
31025 d->perm[i] -= nelt;
31031 ix86_expand_vec_perm_builtin (tree exp)
31033 struct expand_vec_perm_d d;
31034 tree arg0, arg1, arg2;
31036 arg0 = CALL_EXPR_ARG (exp, 0);
31037 arg1 = CALL_EXPR_ARG (exp, 1);
31038 arg2 = CALL_EXPR_ARG (exp, 2);
31040 d.vmode = TYPE_MODE (TREE_TYPE (arg0));
31041 d.nelt = GET_MODE_NUNITS (d.vmode);
31042 d.testing_p = false;
31043 gcc_assert (VECTOR_MODE_P (d.vmode));
31045 if (TREE_CODE (arg2) != VECTOR_CST)
31047 error_at (EXPR_LOCATION (exp),
31048 "vector permutation requires vector constant");
31052 switch (extract_vec_perm_cst (&d, arg2))
31058 error_at (EXPR_LOCATION (exp), "invalid vector permutation constant");
31062 if (!operand_equal_p (arg0, arg1, 0))
31064 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
31065 d.op0 = force_reg (d.vmode, d.op0);
31066 d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
31067 d.op1 = force_reg (d.vmode, d.op1);
31071 /* The elements of PERM do not suggest that only the first operand
31072 is used, but both operands are identical. Allow easier matching
31073 of the permutation by folding the permutation into the single
31076 unsigned i, nelt = d.nelt;
31077 for (i = 0; i < nelt; ++i)
31078 if (d.perm[i] >= nelt)
31084 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
31085 d.op0 = force_reg (d.vmode, d.op0);
31090 d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
31091 d.op0 = force_reg (d.vmode, d.op0);
31096 d.target = gen_reg_rtx (d.vmode);
31097 if (ix86_expand_vec_perm_builtin_1 (&d))
31100 /* For compiler generated permutations, we should never got here, because
31101 the compiler should also be checking the ok hook. But since this is a
31102 builtin the user has access too, so don't abort. */
31106 sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]);
31109 sorry ("vector permutation (%d %d %d %d)",
31110 d.perm[0], d.perm[1], d.perm[2], d.perm[3]);
31113 sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
31114 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
31115 d.perm[4], d.perm[5], d.perm[6], d.perm[7]);
31118 sorry ("vector permutation "
31119 "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
31120 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
31121 d.perm[4], d.perm[5], d.perm[6], d.perm[7],
31122 d.perm[8], d.perm[9], d.perm[10], d.perm[11],
31123 d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
31126 gcc_unreachable ();
31129 return CONST0_RTX (d.vmode);
31132 /* Implement targetm.vectorize.builtin_vec_perm_ok. */
31135 ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask)
31137 struct expand_vec_perm_d d;
31141 d.vmode = TYPE_MODE (vec_type);
31142 d.nelt = GET_MODE_NUNITS (d.vmode);
31143 d.testing_p = true;
31145 /* Given sufficient ISA support we can just return true here
31146 for selected vector modes. */
31147 if (GET_MODE_SIZE (d.vmode) == 16)
31149 /* All implementable with a single vpperm insn. */
31152 /* All implementable with 2 pshufb + 1 ior. */
31155 /* All implementable with shufpd or unpck[lh]pd. */
31160 vec_mask = extract_vec_perm_cst (&d, mask);
31162 /* This hook is cannot be called in response to something that the
31163 user does (unlike the builtin expander) so we shouldn't ever see
31164 an error generated from the extract. */
31165 gcc_assert (vec_mask > 0 && vec_mask <= 3);
31166 one_vec = (vec_mask != 3);
31168 /* Implementable with shufps or pshufd. */
31169 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
31172 /* Otherwise we have to go through the motions and see if we can
31173 figure out how to generate the requested permutation. */
31174 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
31175 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
31177 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
31180 ret = ix86_expand_vec_perm_builtin_1 (&d);
31187 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
31189 struct expand_vec_perm_d d;
31195 d.vmode = GET_MODE (targ);
31196 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
31197 d.testing_p = false;
31199 for (i = 0; i < nelt; ++i)
31200 d.perm[i] = i * 2 + odd;
31202 /* We'll either be able to implement the permutation directly... */
31203 if (expand_vec_perm_1 (&d))
31206 /* ... or we use the special-case patterns. */
31207 expand_vec_perm_even_odd_1 (&d, odd);
31210 /* This function returns the calling abi specific va_list type node.
31211 It returns the FNDECL specific va_list type. */
31214 ix86_fn_abi_va_list (tree fndecl)
31217 return va_list_type_node;
31218 gcc_assert (fndecl != NULL_TREE);
31220 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
31221 return ms_va_list_type_node;
31223 return sysv_va_list_type_node;
31226 /* Returns the canonical va_list type specified by TYPE. If there
31227 is no valid TYPE provided, it return NULL_TREE. */
31230 ix86_canonical_va_list_type (tree type)
31234 /* Resolve references and pointers to va_list type. */
31235 if (TREE_CODE (type) == MEM_REF)
31236 type = TREE_TYPE (type);
31237 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
31238 type = TREE_TYPE (type);
31239 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
31240 type = TREE_TYPE (type);
31244 wtype = va_list_type_node;
31245 gcc_assert (wtype != NULL_TREE);
31247 if (TREE_CODE (wtype) == ARRAY_TYPE)
31249 /* If va_list is an array type, the argument may have decayed
31250 to a pointer type, e.g. by being passed to another function.
31251 In that case, unwrap both types so that we can compare the
31252 underlying records. */
31253 if (TREE_CODE (htype) == ARRAY_TYPE
31254 || POINTER_TYPE_P (htype))
31256 wtype = TREE_TYPE (wtype);
31257 htype = TREE_TYPE (htype);
31260 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
31261 return va_list_type_node;
31262 wtype = sysv_va_list_type_node;
31263 gcc_assert (wtype != NULL_TREE);
31265 if (TREE_CODE (wtype) == ARRAY_TYPE)
31267 /* If va_list is an array type, the argument may have decayed
31268 to a pointer type, e.g. by being passed to another function.
31269 In that case, unwrap both types so that we can compare the
31270 underlying records. */
31271 if (TREE_CODE (htype) == ARRAY_TYPE
31272 || POINTER_TYPE_P (htype))
31274 wtype = TREE_TYPE (wtype);
31275 htype = TREE_TYPE (htype);
31278 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
31279 return sysv_va_list_type_node;
31280 wtype = ms_va_list_type_node;
31281 gcc_assert (wtype != NULL_TREE);
31283 if (TREE_CODE (wtype) == ARRAY_TYPE)
31285 /* If va_list is an array type, the argument may have decayed
31286 to a pointer type, e.g. by being passed to another function.
31287 In that case, unwrap both types so that we can compare the
31288 underlying records. */
31289 if (TREE_CODE (htype) == ARRAY_TYPE
31290 || POINTER_TYPE_P (htype))
31292 wtype = TREE_TYPE (wtype);
31293 htype = TREE_TYPE (htype);
31296 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
31297 return ms_va_list_type_node;
31300 return std_canonical_va_list_type (type);
31303 /* Iterate through the target-specific builtin types for va_list.
31304 IDX denotes the iterator, *PTREE is set to the result type of
31305 the va_list builtin, and *PNAME to its internal type.
31306 Returns zero if there is no element for this index, otherwise
31307 IDX should be increased upon the next call.
31308 Note, do not iterate a base builtin's name like __builtin_va_list.
31309 Used from c_common_nodes_and_builtins. */
31312 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
31322 *ptree = ms_va_list_type_node;
31323 *pname = "__builtin_ms_va_list";
31327 *ptree = sysv_va_list_type_node;
31328 *pname = "__builtin_sysv_va_list";
31336 /* Initialize the GCC target structure. */
31337 #undef TARGET_RETURN_IN_MEMORY
31338 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
31340 #undef TARGET_LEGITIMIZE_ADDRESS
31341 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
31343 #undef TARGET_ATTRIBUTE_TABLE
31344 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
31345 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
31346 # undef TARGET_MERGE_DECL_ATTRIBUTES
31347 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
31350 #undef TARGET_COMP_TYPE_ATTRIBUTES
31351 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
31353 #undef TARGET_INIT_BUILTINS
31354 #define TARGET_INIT_BUILTINS ix86_init_builtins
31355 #undef TARGET_BUILTIN_DECL
31356 #define TARGET_BUILTIN_DECL ix86_builtin_decl
31357 #undef TARGET_EXPAND_BUILTIN
31358 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
31360 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
31361 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
31362 ix86_builtin_vectorized_function
31364 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
31365 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
31367 #undef TARGET_BUILTIN_RECIPROCAL
31368 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
31370 #undef TARGET_ASM_FUNCTION_EPILOGUE
31371 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
31373 #undef TARGET_ENCODE_SECTION_INFO
31374 #ifndef SUBTARGET_ENCODE_SECTION_INFO
31375 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
31377 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
31380 #undef TARGET_ASM_OPEN_PAREN
31381 #define TARGET_ASM_OPEN_PAREN ""
31382 #undef TARGET_ASM_CLOSE_PAREN
31383 #define TARGET_ASM_CLOSE_PAREN ""
31385 #undef TARGET_ASM_BYTE_OP
31386 #define TARGET_ASM_BYTE_OP ASM_BYTE
31388 #undef TARGET_ASM_ALIGNED_HI_OP
31389 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
31390 #undef TARGET_ASM_ALIGNED_SI_OP
31391 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
31393 #undef TARGET_ASM_ALIGNED_DI_OP
31394 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
31397 #undef TARGET_PROFILE_BEFORE_PROLOGUE
31398 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
31400 #undef TARGET_ASM_UNALIGNED_HI_OP
31401 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
31402 #undef TARGET_ASM_UNALIGNED_SI_OP
31403 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
31404 #undef TARGET_ASM_UNALIGNED_DI_OP
31405 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
31407 #undef TARGET_PRINT_OPERAND
31408 #define TARGET_PRINT_OPERAND ix86_print_operand
31409 #undef TARGET_PRINT_OPERAND_ADDRESS
31410 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
31411 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
31412 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
31414 #undef TARGET_SCHED_ADJUST_COST
31415 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
31416 #undef TARGET_SCHED_ISSUE_RATE
31417 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
31418 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
31419 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
31420 ia32_multipass_dfa_lookahead
31422 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
31423 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
31426 #undef TARGET_HAVE_TLS
31427 #define TARGET_HAVE_TLS true
31429 #undef TARGET_CANNOT_FORCE_CONST_MEM
31430 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
31431 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
31432 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
31434 #undef TARGET_DELEGITIMIZE_ADDRESS
31435 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
31437 #undef TARGET_MS_BITFIELD_LAYOUT_P
31438 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
31441 #undef TARGET_BINDS_LOCAL_P
31442 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
31444 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
31445 #undef TARGET_BINDS_LOCAL_P
31446 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
31449 #undef TARGET_ASM_OUTPUT_MI_THUNK
31450 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
31451 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
31452 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
31454 #undef TARGET_ASM_FILE_START
31455 #define TARGET_ASM_FILE_START x86_file_start
31457 #undef TARGET_DEFAULT_TARGET_FLAGS
31458 #define TARGET_DEFAULT_TARGET_FLAGS \
31460 | TARGET_SUBTARGET_DEFAULT \
31461 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT \
31464 #undef TARGET_HANDLE_OPTION
31465 #define TARGET_HANDLE_OPTION ix86_handle_option
31467 #undef TARGET_REGISTER_MOVE_COST
31468 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
31469 #undef TARGET_MEMORY_MOVE_COST
31470 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
31471 #undef TARGET_RTX_COSTS
31472 #define TARGET_RTX_COSTS ix86_rtx_costs
31473 #undef TARGET_ADDRESS_COST
31474 #define TARGET_ADDRESS_COST ix86_address_cost
31476 #undef TARGET_FIXED_CONDITION_CODE_REGS
31477 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
31478 #undef TARGET_CC_MODES_COMPATIBLE
31479 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
31481 #undef TARGET_MACHINE_DEPENDENT_REORG
31482 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
31484 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
31485 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
31487 #undef TARGET_BUILD_BUILTIN_VA_LIST
31488 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
31490 #undef TARGET_ENUM_VA_LIST_P
31491 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
31493 #undef TARGET_FN_ABI_VA_LIST
31494 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
31496 #undef TARGET_CANONICAL_VA_LIST_TYPE
31497 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
31499 #undef TARGET_EXPAND_BUILTIN_VA_START
31500 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
31502 #undef TARGET_MD_ASM_CLOBBERS
31503 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
31505 #undef TARGET_PROMOTE_PROTOTYPES
31506 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
31507 #undef TARGET_STRUCT_VALUE_RTX
31508 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
31509 #undef TARGET_SETUP_INCOMING_VARARGS
31510 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
31511 #undef TARGET_MUST_PASS_IN_STACK
31512 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
31513 #undef TARGET_FUNCTION_ARG_ADVANCE
31514 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
31515 #undef TARGET_FUNCTION_ARG
31516 #define TARGET_FUNCTION_ARG ix86_function_arg
31517 #undef TARGET_PASS_BY_REFERENCE
31518 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
31519 #undef TARGET_INTERNAL_ARG_POINTER
31520 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
31521 #undef TARGET_UPDATE_STACK_BOUNDARY
31522 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
31523 #undef TARGET_GET_DRAP_RTX
31524 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
31525 #undef TARGET_STRICT_ARGUMENT_NAMING
31526 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
31527 #undef TARGET_STATIC_CHAIN
31528 #define TARGET_STATIC_CHAIN ix86_static_chain
31529 #undef TARGET_TRAMPOLINE_INIT
31530 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
31531 #undef TARGET_RETURN_POPS_ARGS
31532 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
31534 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
31535 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
31537 #undef TARGET_SCALAR_MODE_SUPPORTED_P
31538 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
31540 #undef TARGET_VECTOR_MODE_SUPPORTED_P
31541 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
31543 #undef TARGET_C_MODE_FOR_SUFFIX
31544 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
31547 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
31548 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
31551 #ifdef SUBTARGET_INSERT_ATTRIBUTES
31552 #undef TARGET_INSERT_ATTRIBUTES
31553 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
31556 #undef TARGET_MANGLE_TYPE
31557 #define TARGET_MANGLE_TYPE ix86_mangle_type
31559 #undef TARGET_STACK_PROTECT_FAIL
31560 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
31562 #undef TARGET_FUNCTION_VALUE
31563 #define TARGET_FUNCTION_VALUE ix86_function_value
31565 #undef TARGET_FUNCTION_VALUE_REGNO_P
31566 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
31568 #undef TARGET_SECONDARY_RELOAD
31569 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
31571 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
31572 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
31573 ix86_builtin_vectorization_cost
31574 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
31575 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
31576 ix86_vectorize_builtin_vec_perm
31577 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
31578 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
31579 ix86_vectorize_builtin_vec_perm_ok
31581 #undef TARGET_SET_CURRENT_FUNCTION
31582 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
31584 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
31585 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
31587 #undef TARGET_OPTION_SAVE
31588 #define TARGET_OPTION_SAVE ix86_function_specific_save
31590 #undef TARGET_OPTION_RESTORE
31591 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
31593 #undef TARGET_OPTION_PRINT
31594 #define TARGET_OPTION_PRINT ix86_function_specific_print
31596 #undef TARGET_CAN_INLINE_P
31597 #define TARGET_CAN_INLINE_P ix86_can_inline_p
31599 #undef TARGET_EXPAND_TO_RTL_HOOK
31600 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
31602 #undef TARGET_LEGITIMATE_ADDRESS_P
31603 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
31605 #undef TARGET_IRA_COVER_CLASSES
31606 #define TARGET_IRA_COVER_CLASSES i386_ira_cover_classes
31608 #undef TARGET_FRAME_POINTER_REQUIRED
31609 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
31611 #undef TARGET_CAN_ELIMINATE
31612 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
31614 #undef TARGET_ASM_CODE_END
31615 #define TARGET_ASM_CODE_END ix86_code_end
31617 struct gcc_target targetm = TARGET_INITIALIZER;
31619 #include "gt-i386.h"