1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
53 #include "tm-constrs.h"
57 #include "dwarf2out.h"
59 static rtx legitimize_dllimport_symbol (rtx, bool);
61 #ifndef CHECK_STACK_LIMIT
62 #define CHECK_STACK_LIMIT (-1)
65 /* Return index of given mode in mult and division cost tables. */
66 #define MODE_INDEX(mode) \
67 ((mode) == QImode ? 0 \
68 : (mode) == HImode ? 1 \
69 : (mode) == SImode ? 2 \
70 : (mode) == DImode ? 3 \
73 /* Processor costs (relative to an add) */
74 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
75 #define COSTS_N_BYTES(N) ((N) * 2)
77 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
80 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
81 COSTS_N_BYTES (2), /* cost of an add instruction */
82 COSTS_N_BYTES (3), /* cost of a lea instruction */
83 COSTS_N_BYTES (2), /* variable shift costs */
84 COSTS_N_BYTES (3), /* constant shift costs */
85 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 0, /* cost of multiply per each bit set */
91 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
92 COSTS_N_BYTES (3), /* HI */
93 COSTS_N_BYTES (3), /* SI */
94 COSTS_N_BYTES (3), /* DI */
95 COSTS_N_BYTES (5)}, /* other */
96 COSTS_N_BYTES (3), /* cost of movsx */
97 COSTS_N_BYTES (3), /* cost of movzx */
100 2, /* cost for loading QImode using movzbl */
101 {2, 2, 2}, /* cost of loading integer registers
102 in QImode, HImode and SImode.
103 Relative to reg-reg move (2). */
104 {2, 2, 2}, /* cost of storing integer registers */
105 2, /* cost of reg,reg fld/fst */
106 {2, 2, 2}, /* cost of loading fp registers
107 in SFmode, DFmode and XFmode */
108 {2, 2, 2}, /* cost of storing fp registers
109 in SFmode, DFmode and XFmode */
110 3, /* cost of moving MMX register */
111 {3, 3}, /* cost of loading MMX registers
112 in SImode and DImode */
113 {3, 3}, /* cost of storing MMX registers
114 in SImode and DImode */
115 3, /* cost of moving SSE register */
116 {3, 3, 3}, /* cost of loading SSE registers
117 in SImode, DImode and TImode */
118 {3, 3, 3}, /* cost of storing SSE registers
119 in SImode, DImode and TImode */
120 3, /* MMX or SSE register to integer */
121 0, /* size of l1 cache */
122 0, /* size of l2 cache */
123 0, /* size of prefetch block */
124 0, /* number of parallel prefetches */
126 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
127 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
128 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
129 COSTS_N_BYTES (2), /* cost of FABS instruction. */
130 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
131 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
132 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
133 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
134 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
135 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
136 1, /* scalar_stmt_cost. */
137 1, /* scalar load_cost. */
138 1, /* scalar_store_cost. */
139 1, /* vec_stmt_cost. */
140 1, /* vec_to_scalar_cost. */
141 1, /* scalar_to_vec_cost. */
142 1, /* vec_align_load_cost. */
143 1, /* vec_unalign_load_cost. */
144 1, /* vec_store_cost. */
145 1, /* cond_taken_branch_cost. */
146 1, /* cond_not_taken_branch_cost. */
149 /* Processor costs (relative to an add) */
151 struct processor_costs i386_cost = { /* 386 specific costs */
152 COSTS_N_INSNS (1), /* cost of an add instruction */
153 COSTS_N_INSNS (1), /* cost of a lea instruction */
154 COSTS_N_INSNS (3), /* variable shift costs */
155 COSTS_N_INSNS (2), /* constant shift costs */
156 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
157 COSTS_N_INSNS (6), /* HI */
158 COSTS_N_INSNS (6), /* SI */
159 COSTS_N_INSNS (6), /* DI */
160 COSTS_N_INSNS (6)}, /* other */
161 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
162 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
163 COSTS_N_INSNS (23), /* HI */
164 COSTS_N_INSNS (23), /* SI */
165 COSTS_N_INSNS (23), /* DI */
166 COSTS_N_INSNS (23)}, /* other */
167 COSTS_N_INSNS (3), /* cost of movsx */
168 COSTS_N_INSNS (2), /* cost of movzx */
169 15, /* "large" insn */
171 4, /* cost for loading QImode using movzbl */
172 {2, 4, 2}, /* cost of loading integer registers
173 in QImode, HImode and SImode.
174 Relative to reg-reg move (2). */
175 {2, 4, 2}, /* cost of storing integer registers */
176 2, /* cost of reg,reg fld/fst */
177 {8, 8, 8}, /* cost of loading fp registers
178 in SFmode, DFmode and XFmode */
179 {8, 8, 8}, /* cost of storing fp registers
180 in SFmode, DFmode and XFmode */
181 2, /* cost of moving MMX register */
182 {4, 8}, /* cost of loading MMX registers
183 in SImode and DImode */
184 {4, 8}, /* cost of storing MMX registers
185 in SImode and DImode */
186 2, /* cost of moving SSE register */
187 {4, 8, 16}, /* cost of loading SSE registers
188 in SImode, DImode and TImode */
189 {4, 8, 16}, /* cost of storing SSE registers
190 in SImode, DImode and TImode */
191 3, /* MMX or SSE register to integer */
192 0, /* size of l1 cache */
193 0, /* size of l2 cache */
194 0, /* size of prefetch block */
195 0, /* number of parallel prefetches */
197 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
198 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
199 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
200 COSTS_N_INSNS (22), /* cost of FABS instruction. */
201 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
202 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
203 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
204 DUMMY_STRINGOP_ALGS},
205 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
206 DUMMY_STRINGOP_ALGS},
207 1, /* scalar_stmt_cost. */
208 1, /* scalar load_cost. */
209 1, /* scalar_store_cost. */
210 1, /* vec_stmt_cost. */
211 1, /* vec_to_scalar_cost. */
212 1, /* scalar_to_vec_cost. */
213 1, /* vec_align_load_cost. */
214 2, /* vec_unalign_load_cost. */
215 1, /* vec_store_cost. */
216 3, /* cond_taken_branch_cost. */
217 1, /* cond_not_taken_branch_cost. */
221 struct processor_costs i486_cost = { /* 486 specific costs */
222 COSTS_N_INSNS (1), /* cost of an add instruction */
223 COSTS_N_INSNS (1), /* cost of a lea instruction */
224 COSTS_N_INSNS (3), /* variable shift costs */
225 COSTS_N_INSNS (2), /* constant shift costs */
226 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
227 COSTS_N_INSNS (12), /* HI */
228 COSTS_N_INSNS (12), /* SI */
229 COSTS_N_INSNS (12), /* DI */
230 COSTS_N_INSNS (12)}, /* other */
231 1, /* cost of multiply per each bit set */
232 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
233 COSTS_N_INSNS (40), /* HI */
234 COSTS_N_INSNS (40), /* SI */
235 COSTS_N_INSNS (40), /* DI */
236 COSTS_N_INSNS (40)}, /* other */
237 COSTS_N_INSNS (3), /* cost of movsx */
238 COSTS_N_INSNS (2), /* cost of movzx */
239 15, /* "large" insn */
241 4, /* cost for loading QImode using movzbl */
242 {2, 4, 2}, /* cost of loading integer registers
243 in QImode, HImode and SImode.
244 Relative to reg-reg move (2). */
245 {2, 4, 2}, /* cost of storing integer registers */
246 2, /* cost of reg,reg fld/fst */
247 {8, 8, 8}, /* cost of loading fp registers
248 in SFmode, DFmode and XFmode */
249 {8, 8, 8}, /* cost of storing fp registers
250 in SFmode, DFmode and XFmode */
251 2, /* cost of moving MMX register */
252 {4, 8}, /* cost of loading MMX registers
253 in SImode and DImode */
254 {4, 8}, /* cost of storing MMX registers
255 in SImode and DImode */
256 2, /* cost of moving SSE register */
257 {4, 8, 16}, /* cost of loading SSE registers
258 in SImode, DImode and TImode */
259 {4, 8, 16}, /* cost of storing SSE registers
260 in SImode, DImode and TImode */
261 3, /* MMX or SSE register to integer */
262 4, /* size of l1 cache. 486 has 8kB cache
263 shared for code and data, so 4kB is
264 not really precise. */
265 4, /* size of l2 cache */
266 0, /* size of prefetch block */
267 0, /* number of parallel prefetches */
269 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
270 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
271 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
272 COSTS_N_INSNS (3), /* cost of FABS instruction. */
273 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
274 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
275 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
276 DUMMY_STRINGOP_ALGS},
277 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
278 DUMMY_STRINGOP_ALGS},
279 1, /* scalar_stmt_cost. */
280 1, /* scalar load_cost. */
281 1, /* scalar_store_cost. */
282 1, /* vec_stmt_cost. */
283 1, /* vec_to_scalar_cost. */
284 1, /* scalar_to_vec_cost. */
285 1, /* vec_align_load_cost. */
286 2, /* vec_unalign_load_cost. */
287 1, /* vec_store_cost. */
288 3, /* cond_taken_branch_cost. */
289 1, /* cond_not_taken_branch_cost. */
293 struct processor_costs pentium_cost = {
294 COSTS_N_INSNS (1), /* cost of an add instruction */
295 COSTS_N_INSNS (1), /* cost of a lea instruction */
296 COSTS_N_INSNS (4), /* variable shift costs */
297 COSTS_N_INSNS (1), /* constant shift costs */
298 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
299 COSTS_N_INSNS (11), /* HI */
300 COSTS_N_INSNS (11), /* SI */
301 COSTS_N_INSNS (11), /* DI */
302 COSTS_N_INSNS (11)}, /* other */
303 0, /* cost of multiply per each bit set */
304 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
305 COSTS_N_INSNS (25), /* HI */
306 COSTS_N_INSNS (25), /* SI */
307 COSTS_N_INSNS (25), /* DI */
308 COSTS_N_INSNS (25)}, /* other */
309 COSTS_N_INSNS (3), /* cost of movsx */
310 COSTS_N_INSNS (2), /* cost of movzx */
311 8, /* "large" insn */
313 6, /* cost for loading QImode using movzbl */
314 {2, 4, 2}, /* cost of loading integer registers
315 in QImode, HImode and SImode.
316 Relative to reg-reg move (2). */
317 {2, 4, 2}, /* cost of storing integer registers */
318 2, /* cost of reg,reg fld/fst */
319 {2, 2, 6}, /* cost of loading fp registers
320 in SFmode, DFmode and XFmode */
321 {4, 4, 6}, /* cost of storing fp registers
322 in SFmode, DFmode and XFmode */
323 8, /* cost of moving MMX register */
324 {8, 8}, /* cost of loading MMX registers
325 in SImode and DImode */
326 {8, 8}, /* cost of storing MMX registers
327 in SImode and DImode */
328 2, /* cost of moving SSE register */
329 {4, 8, 16}, /* cost of loading SSE registers
330 in SImode, DImode and TImode */
331 {4, 8, 16}, /* cost of storing SSE registers
332 in SImode, DImode and TImode */
333 3, /* MMX or SSE register to integer */
334 8, /* size of l1 cache. */
335 8, /* size of l2 cache */
336 0, /* size of prefetch block */
337 0, /* number of parallel prefetches */
339 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
340 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
341 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
342 COSTS_N_INSNS (1), /* cost of FABS instruction. */
343 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
344 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
345 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
346 DUMMY_STRINGOP_ALGS},
347 {{libcall, {{-1, rep_prefix_4_byte}}},
348 DUMMY_STRINGOP_ALGS},
349 1, /* scalar_stmt_cost. */
350 1, /* scalar load_cost. */
351 1, /* scalar_store_cost. */
352 1, /* vec_stmt_cost. */
353 1, /* vec_to_scalar_cost. */
354 1, /* scalar_to_vec_cost. */
355 1, /* vec_align_load_cost. */
356 2, /* vec_unalign_load_cost. */
357 1, /* vec_store_cost. */
358 3, /* cond_taken_branch_cost. */
359 1, /* cond_not_taken_branch_cost. */
363 struct processor_costs pentiumpro_cost = {
364 COSTS_N_INSNS (1), /* cost of an add instruction */
365 COSTS_N_INSNS (1), /* cost of a lea instruction */
366 COSTS_N_INSNS (1), /* variable shift costs */
367 COSTS_N_INSNS (1), /* constant shift costs */
368 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
369 COSTS_N_INSNS (4), /* HI */
370 COSTS_N_INSNS (4), /* SI */
371 COSTS_N_INSNS (4), /* DI */
372 COSTS_N_INSNS (4)}, /* other */
373 0, /* cost of multiply per each bit set */
374 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
375 COSTS_N_INSNS (17), /* HI */
376 COSTS_N_INSNS (17), /* SI */
377 COSTS_N_INSNS (17), /* DI */
378 COSTS_N_INSNS (17)}, /* other */
379 COSTS_N_INSNS (1), /* cost of movsx */
380 COSTS_N_INSNS (1), /* cost of movzx */
381 8, /* "large" insn */
383 2, /* cost for loading QImode using movzbl */
384 {4, 4, 4}, /* cost of loading integer registers
385 in QImode, HImode and SImode.
386 Relative to reg-reg move (2). */
387 {2, 2, 2}, /* cost of storing integer registers */
388 2, /* cost of reg,reg fld/fst */
389 {2, 2, 6}, /* cost of loading fp registers
390 in SFmode, DFmode and XFmode */
391 {4, 4, 6}, /* cost of storing fp registers
392 in SFmode, DFmode and XFmode */
393 2, /* cost of moving MMX register */
394 {2, 2}, /* cost of loading MMX registers
395 in SImode and DImode */
396 {2, 2}, /* cost of storing MMX registers
397 in SImode and DImode */
398 2, /* cost of moving SSE register */
399 {2, 2, 8}, /* cost of loading SSE registers
400 in SImode, DImode and TImode */
401 {2, 2, 8}, /* cost of storing SSE registers
402 in SImode, DImode and TImode */
403 3, /* MMX or SSE register to integer */
404 8, /* size of l1 cache. */
405 256, /* size of l2 cache */
406 32, /* size of prefetch block */
407 6, /* number of parallel prefetches */
409 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
410 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
411 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
412 COSTS_N_INSNS (2), /* cost of FABS instruction. */
413 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
414 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
415 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
416 (we ensure the alignment). For small blocks inline loop is still a
417 noticeable win, for bigger blocks either rep movsl or rep movsb is
418 way to go. Rep movsb has apparently more expensive startup time in CPU,
419 but after 4K the difference is down in the noise. */
420 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
421 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
422 DUMMY_STRINGOP_ALGS},
423 {{rep_prefix_4_byte, {{1024, unrolled_loop},
424 {8192, rep_prefix_4_byte}, {-1, libcall}}},
425 DUMMY_STRINGOP_ALGS},
426 1, /* scalar_stmt_cost. */
427 1, /* scalar load_cost. */
428 1, /* scalar_store_cost. */
429 1, /* vec_stmt_cost. */
430 1, /* vec_to_scalar_cost. */
431 1, /* scalar_to_vec_cost. */
432 1, /* vec_align_load_cost. */
433 2, /* vec_unalign_load_cost. */
434 1, /* vec_store_cost. */
435 3, /* cond_taken_branch_cost. */
436 1, /* cond_not_taken_branch_cost. */
440 struct processor_costs geode_cost = {
441 COSTS_N_INSNS (1), /* cost of an add instruction */
442 COSTS_N_INSNS (1), /* cost of a lea instruction */
443 COSTS_N_INSNS (2), /* variable shift costs */
444 COSTS_N_INSNS (1), /* constant shift costs */
445 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
446 COSTS_N_INSNS (4), /* HI */
447 COSTS_N_INSNS (7), /* SI */
448 COSTS_N_INSNS (7), /* DI */
449 COSTS_N_INSNS (7)}, /* other */
450 0, /* cost of multiply per each bit set */
451 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
452 COSTS_N_INSNS (23), /* HI */
453 COSTS_N_INSNS (39), /* SI */
454 COSTS_N_INSNS (39), /* DI */
455 COSTS_N_INSNS (39)}, /* other */
456 COSTS_N_INSNS (1), /* cost of movsx */
457 COSTS_N_INSNS (1), /* cost of movzx */
458 8, /* "large" insn */
460 1, /* cost for loading QImode using movzbl */
461 {1, 1, 1}, /* cost of loading integer registers
462 in QImode, HImode and SImode.
463 Relative to reg-reg move (2). */
464 {1, 1, 1}, /* cost of storing integer registers */
465 1, /* cost of reg,reg fld/fst */
466 {1, 1, 1}, /* cost of loading fp registers
467 in SFmode, DFmode and XFmode */
468 {4, 6, 6}, /* cost of storing fp registers
469 in SFmode, DFmode and XFmode */
471 1, /* cost of moving MMX register */
472 {1, 1}, /* cost of loading MMX registers
473 in SImode and DImode */
474 {1, 1}, /* cost of storing MMX registers
475 in SImode and DImode */
476 1, /* cost of moving SSE register */
477 {1, 1, 1}, /* cost of loading SSE registers
478 in SImode, DImode and TImode */
479 {1, 1, 1}, /* cost of storing SSE registers
480 in SImode, DImode and TImode */
481 1, /* MMX or SSE register to integer */
482 64, /* size of l1 cache. */
483 128, /* size of l2 cache. */
484 32, /* size of prefetch block */
485 1, /* number of parallel prefetches */
487 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
488 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
489 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
490 COSTS_N_INSNS (1), /* cost of FABS instruction. */
491 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
492 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
493 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
494 DUMMY_STRINGOP_ALGS},
495 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
496 DUMMY_STRINGOP_ALGS},
497 1, /* scalar_stmt_cost. */
498 1, /* scalar load_cost. */
499 1, /* scalar_store_cost. */
500 1, /* vec_stmt_cost. */
501 1, /* vec_to_scalar_cost. */
502 1, /* scalar_to_vec_cost. */
503 1, /* vec_align_load_cost. */
504 2, /* vec_unalign_load_cost. */
505 1, /* vec_store_cost. */
506 3, /* cond_taken_branch_cost. */
507 1, /* cond_not_taken_branch_cost. */
511 struct processor_costs k6_cost = {
512 COSTS_N_INSNS (1), /* cost of an add instruction */
513 COSTS_N_INSNS (2), /* cost of a lea instruction */
514 COSTS_N_INSNS (1), /* variable shift costs */
515 COSTS_N_INSNS (1), /* constant shift costs */
516 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
517 COSTS_N_INSNS (3), /* HI */
518 COSTS_N_INSNS (3), /* SI */
519 COSTS_N_INSNS (3), /* DI */
520 COSTS_N_INSNS (3)}, /* other */
521 0, /* cost of multiply per each bit set */
522 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
523 COSTS_N_INSNS (18), /* HI */
524 COSTS_N_INSNS (18), /* SI */
525 COSTS_N_INSNS (18), /* DI */
526 COSTS_N_INSNS (18)}, /* other */
527 COSTS_N_INSNS (2), /* cost of movsx */
528 COSTS_N_INSNS (2), /* cost of movzx */
529 8, /* "large" insn */
531 3, /* cost for loading QImode using movzbl */
532 {4, 5, 4}, /* cost of loading integer registers
533 in QImode, HImode and SImode.
534 Relative to reg-reg move (2). */
535 {2, 3, 2}, /* cost of storing integer registers */
536 4, /* cost of reg,reg fld/fst */
537 {6, 6, 6}, /* cost of loading fp registers
538 in SFmode, DFmode and XFmode */
539 {4, 4, 4}, /* cost of storing fp registers
540 in SFmode, DFmode and XFmode */
541 2, /* cost of moving MMX register */
542 {2, 2}, /* cost of loading MMX registers
543 in SImode and DImode */
544 {2, 2}, /* cost of storing MMX registers
545 in SImode and DImode */
546 2, /* cost of moving SSE register */
547 {2, 2, 8}, /* cost of loading SSE registers
548 in SImode, DImode and TImode */
549 {2, 2, 8}, /* cost of storing SSE registers
550 in SImode, DImode and TImode */
551 6, /* MMX or SSE register to integer */
552 32, /* size of l1 cache. */
553 32, /* size of l2 cache. Some models
554 have integrated l2 cache, but
555 optimizing for k6 is not important
556 enough to worry about that. */
557 32, /* size of prefetch block */
558 1, /* number of parallel prefetches */
560 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
561 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
562 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
563 COSTS_N_INSNS (2), /* cost of FABS instruction. */
564 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
565 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
566 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
567 DUMMY_STRINGOP_ALGS},
568 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
569 DUMMY_STRINGOP_ALGS},
570 1, /* scalar_stmt_cost. */
571 1, /* scalar load_cost. */
572 1, /* scalar_store_cost. */
573 1, /* vec_stmt_cost. */
574 1, /* vec_to_scalar_cost. */
575 1, /* scalar_to_vec_cost. */
576 1, /* vec_align_load_cost. */
577 2, /* vec_unalign_load_cost. */
578 1, /* vec_store_cost. */
579 3, /* cond_taken_branch_cost. */
580 1, /* cond_not_taken_branch_cost. */
584 struct processor_costs athlon_cost = {
585 COSTS_N_INSNS (1), /* cost of an add instruction */
586 COSTS_N_INSNS (2), /* cost of a lea instruction */
587 COSTS_N_INSNS (1), /* variable shift costs */
588 COSTS_N_INSNS (1), /* constant shift costs */
589 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
590 COSTS_N_INSNS (5), /* HI */
591 COSTS_N_INSNS (5), /* SI */
592 COSTS_N_INSNS (5), /* DI */
593 COSTS_N_INSNS (5)}, /* other */
594 0, /* cost of multiply per each bit set */
595 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
596 COSTS_N_INSNS (26), /* HI */
597 COSTS_N_INSNS (42), /* SI */
598 COSTS_N_INSNS (74), /* DI */
599 COSTS_N_INSNS (74)}, /* other */
600 COSTS_N_INSNS (1), /* cost of movsx */
601 COSTS_N_INSNS (1), /* cost of movzx */
602 8, /* "large" insn */
604 4, /* cost for loading QImode using movzbl */
605 {3, 4, 3}, /* cost of loading integer registers
606 in QImode, HImode and SImode.
607 Relative to reg-reg move (2). */
608 {3, 4, 3}, /* cost of storing integer registers */
609 4, /* cost of reg,reg fld/fst */
610 {4, 4, 12}, /* cost of loading fp registers
611 in SFmode, DFmode and XFmode */
612 {6, 6, 8}, /* cost of storing fp registers
613 in SFmode, DFmode and XFmode */
614 2, /* cost of moving MMX register */
615 {4, 4}, /* cost of loading MMX registers
616 in SImode and DImode */
617 {4, 4}, /* cost of storing MMX registers
618 in SImode and DImode */
619 2, /* cost of moving SSE register */
620 {4, 4, 6}, /* cost of loading SSE registers
621 in SImode, DImode and TImode */
622 {4, 4, 5}, /* cost of storing SSE registers
623 in SImode, DImode and TImode */
624 5, /* MMX or SSE register to integer */
625 64, /* size of l1 cache. */
626 256, /* size of l2 cache. */
627 64, /* size of prefetch block */
628 6, /* number of parallel prefetches */
630 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
631 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
632 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
633 COSTS_N_INSNS (2), /* cost of FABS instruction. */
634 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
635 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
636 /* For some reason, Athlon deals better with REP prefix (relative to loops)
637 compared to K8. Alignment becomes important after 8 bytes for memcpy and
638 128 bytes for memset. */
639 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
640 DUMMY_STRINGOP_ALGS},
641 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
642 DUMMY_STRINGOP_ALGS},
643 1, /* scalar_stmt_cost. */
644 1, /* scalar load_cost. */
645 1, /* scalar_store_cost. */
646 1, /* vec_stmt_cost. */
647 1, /* vec_to_scalar_cost. */
648 1, /* scalar_to_vec_cost. */
649 1, /* vec_align_load_cost. */
650 2, /* vec_unalign_load_cost. */
651 1, /* vec_store_cost. */
652 3, /* cond_taken_branch_cost. */
653 1, /* cond_not_taken_branch_cost. */
657 struct processor_costs k8_cost = {
658 COSTS_N_INSNS (1), /* cost of an add instruction */
659 COSTS_N_INSNS (2), /* cost of a lea instruction */
660 COSTS_N_INSNS (1), /* variable shift costs */
661 COSTS_N_INSNS (1), /* constant shift costs */
662 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
663 COSTS_N_INSNS (4), /* HI */
664 COSTS_N_INSNS (3), /* SI */
665 COSTS_N_INSNS (4), /* DI */
666 COSTS_N_INSNS (5)}, /* other */
667 0, /* cost of multiply per each bit set */
668 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
669 COSTS_N_INSNS (26), /* HI */
670 COSTS_N_INSNS (42), /* SI */
671 COSTS_N_INSNS (74), /* DI */
672 COSTS_N_INSNS (74)}, /* other */
673 COSTS_N_INSNS (1), /* cost of movsx */
674 COSTS_N_INSNS (1), /* cost of movzx */
675 8, /* "large" insn */
677 4, /* cost for loading QImode using movzbl */
678 {3, 4, 3}, /* cost of loading integer registers
679 in QImode, HImode and SImode.
680 Relative to reg-reg move (2). */
681 {3, 4, 3}, /* cost of storing integer registers */
682 4, /* cost of reg,reg fld/fst */
683 {4, 4, 12}, /* cost of loading fp registers
684 in SFmode, DFmode and XFmode */
685 {6, 6, 8}, /* cost of storing fp registers
686 in SFmode, DFmode and XFmode */
687 2, /* cost of moving MMX register */
688 {3, 3}, /* cost of loading MMX registers
689 in SImode and DImode */
690 {4, 4}, /* cost of storing MMX registers
691 in SImode and DImode */
692 2, /* cost of moving SSE register */
693 {4, 3, 6}, /* cost of loading SSE registers
694 in SImode, DImode and TImode */
695 {4, 4, 5}, /* cost of storing SSE registers
696 in SImode, DImode and TImode */
697 5, /* MMX or SSE register to integer */
698 64, /* size of l1 cache. */
699 512, /* size of l2 cache. */
700 64, /* size of prefetch block */
701 /* New AMD processors never drop prefetches; if they cannot be performed
702 immediately, they are queued. We set number of simultaneous prefetches
703 to a large constant to reflect this (it probably is not a good idea not
704 to limit number of prefetches at all, as their execution also takes some
706 100, /* number of parallel prefetches */
708 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
709 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
710 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
711 COSTS_N_INSNS (2), /* cost of FABS instruction. */
712 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
713 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
714 /* K8 has optimized REP instruction for medium sized blocks, but for very
715 small blocks it is better to use loop. For large blocks, libcall can
716 do nontemporary accesses and beat inline considerably. */
717 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
718 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
719 {{libcall, {{8, loop}, {24, unrolled_loop},
720 {2048, rep_prefix_4_byte}, {-1, libcall}}},
721 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
722 4, /* scalar_stmt_cost. */
723 2, /* scalar load_cost. */
724 2, /* scalar_store_cost. */
725 5, /* vec_stmt_cost. */
726 0, /* vec_to_scalar_cost. */
727 2, /* scalar_to_vec_cost. */
728 2, /* vec_align_load_cost. */
729 3, /* vec_unalign_load_cost. */
730 3, /* vec_store_cost. */
731 3, /* cond_taken_branch_cost. */
732 2, /* cond_not_taken_branch_cost. */
735 struct processor_costs amdfam10_cost = {
736 COSTS_N_INSNS (1), /* cost of an add instruction */
737 COSTS_N_INSNS (2), /* cost of a lea instruction */
738 COSTS_N_INSNS (1), /* variable shift costs */
739 COSTS_N_INSNS (1), /* constant shift costs */
740 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
741 COSTS_N_INSNS (4), /* HI */
742 COSTS_N_INSNS (3), /* SI */
743 COSTS_N_INSNS (4), /* DI */
744 COSTS_N_INSNS (5)}, /* other */
745 0, /* cost of multiply per each bit set */
746 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
747 COSTS_N_INSNS (35), /* HI */
748 COSTS_N_INSNS (51), /* SI */
749 COSTS_N_INSNS (83), /* DI */
750 COSTS_N_INSNS (83)}, /* other */
751 COSTS_N_INSNS (1), /* cost of movsx */
752 COSTS_N_INSNS (1), /* cost of movzx */
753 8, /* "large" insn */
755 4, /* cost for loading QImode using movzbl */
756 {3, 4, 3}, /* cost of loading integer registers
757 in QImode, HImode and SImode.
758 Relative to reg-reg move (2). */
759 {3, 4, 3}, /* cost of storing integer registers */
760 4, /* cost of reg,reg fld/fst */
761 {4, 4, 12}, /* cost of loading fp registers
762 in SFmode, DFmode and XFmode */
763 {6, 6, 8}, /* cost of storing fp registers
764 in SFmode, DFmode and XFmode */
765 2, /* cost of moving MMX register */
766 {3, 3}, /* cost of loading MMX registers
767 in SImode and DImode */
768 {4, 4}, /* cost of storing MMX registers
769 in SImode and DImode */
770 2, /* cost of moving SSE register */
771 {4, 4, 3}, /* cost of loading SSE registers
772 in SImode, DImode and TImode */
773 {4, 4, 5}, /* cost of storing SSE registers
774 in SImode, DImode and TImode */
775 3, /* MMX or SSE register to integer */
777 MOVD reg64, xmmreg Double FSTORE 4
778 MOVD reg32, xmmreg Double FSTORE 4
780 MOVD reg64, xmmreg Double FADD 3
782 MOVD reg32, xmmreg Double FADD 3
784 64, /* size of l1 cache. */
785 512, /* size of l2 cache. */
786 64, /* size of prefetch block */
787 /* New AMD processors never drop prefetches; if they cannot be performed
788 immediately, they are queued. We set number of simultaneous prefetches
789 to a large constant to reflect this (it probably is not a good idea not
790 to limit number of prefetches at all, as their execution also takes some
792 100, /* number of parallel prefetches */
794 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
795 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
796 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
797 COSTS_N_INSNS (2), /* cost of FABS instruction. */
798 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
799 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
801 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
802 very small blocks it is better to use loop. For large blocks, libcall can
803 do nontemporary accesses and beat inline considerably. */
804 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
805 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
806 {{libcall, {{8, loop}, {24, unrolled_loop},
807 {2048, rep_prefix_4_byte}, {-1, libcall}}},
808 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
809 4, /* scalar_stmt_cost. */
810 2, /* scalar load_cost. */
811 2, /* scalar_store_cost. */
812 6, /* vec_stmt_cost. */
813 0, /* vec_to_scalar_cost. */
814 2, /* scalar_to_vec_cost. */
815 2, /* vec_align_load_cost. */
816 2, /* vec_unalign_load_cost. */
817 2, /* vec_store_cost. */
818 2, /* cond_taken_branch_cost. */
819 1, /* cond_not_taken_branch_cost. */
822 struct processor_costs bdver1_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (2), /* cost of a lea instruction */
825 COSTS_N_INSNS (1), /* variable shift costs */
826 COSTS_N_INSNS (1), /* constant shift costs */
827 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (4), /* HI */
829 COSTS_N_INSNS (3), /* SI */
830 COSTS_N_INSNS (4), /* DI */
831 COSTS_N_INSNS (5)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (35), /* HI */
835 COSTS_N_INSNS (51), /* SI */
836 COSTS_N_INSNS (83), /* DI */
837 COSTS_N_INSNS (83)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 8, /* "large" insn */
842 4, /* cost for loading QImode using movzbl */
843 {3, 4, 3}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {3, 4, 3}, /* cost of storing integer registers */
847 4, /* cost of reg,reg fld/fst */
848 {4, 4, 12}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {6, 6, 8}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {3, 3}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {4, 4}, /* cost of storing MMX registers
856 in SImode and DImode */
857 2, /* cost of moving SSE register */
858 {4, 4, 3}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {4, 4, 5}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 3, /* MMX or SSE register to integer */
864 MOVD reg64, xmmreg Double FSTORE 4
865 MOVD reg32, xmmreg Double FSTORE 4
867 MOVD reg64, xmmreg Double FADD 3
869 MOVD reg32, xmmreg Double FADD 3
871 64, /* size of l1 cache. */
872 1024, /* size of l2 cache. */
873 64, /* size of prefetch block */
874 /* New AMD processors never drop prefetches; if they cannot be performed
875 immediately, they are queued. We set number of simultaneous prefetches
876 to a large constant to reflect this (it probably is not a good idea not
877 to limit number of prefetches at all, as their execution also takes some
879 100, /* number of parallel prefetches */
881 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
882 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
883 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
884 COSTS_N_INSNS (2), /* cost of FABS instruction. */
885 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
886 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
888 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
889 very small blocks it is better to use loop. For large blocks, libcall
890 can do nontemporary accesses and beat inline considerably. */
891 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
892 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
893 {{libcall, {{8, loop}, {24, unrolled_loop},
894 {2048, rep_prefix_4_byte}, {-1, libcall}}},
895 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
896 4, /* scalar_stmt_cost. */
897 2, /* scalar load_cost. */
898 2, /* scalar_store_cost. */
899 6, /* vec_stmt_cost. */
900 0, /* vec_to_scalar_cost. */
901 2, /* scalar_to_vec_cost. */
902 2, /* vec_align_load_cost. */
903 2, /* vec_unalign_load_cost. */
904 2, /* vec_store_cost. */
905 2, /* cond_taken_branch_cost. */
906 1, /* cond_not_taken_branch_cost. */
910 struct processor_costs pentium4_cost = {
911 COSTS_N_INSNS (1), /* cost of an add instruction */
912 COSTS_N_INSNS (3), /* cost of a lea instruction */
913 COSTS_N_INSNS (4), /* variable shift costs */
914 COSTS_N_INSNS (4), /* constant shift costs */
915 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
916 COSTS_N_INSNS (15), /* HI */
917 COSTS_N_INSNS (15), /* SI */
918 COSTS_N_INSNS (15), /* DI */
919 COSTS_N_INSNS (15)}, /* other */
920 0, /* cost of multiply per each bit set */
921 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
922 COSTS_N_INSNS (56), /* HI */
923 COSTS_N_INSNS (56), /* SI */
924 COSTS_N_INSNS (56), /* DI */
925 COSTS_N_INSNS (56)}, /* other */
926 COSTS_N_INSNS (1), /* cost of movsx */
927 COSTS_N_INSNS (1), /* cost of movzx */
928 16, /* "large" insn */
930 2, /* cost for loading QImode using movzbl */
931 {4, 5, 4}, /* cost of loading integer registers
932 in QImode, HImode and SImode.
933 Relative to reg-reg move (2). */
934 {2, 3, 2}, /* cost of storing integer registers */
935 2, /* cost of reg,reg fld/fst */
936 {2, 2, 6}, /* cost of loading fp registers
937 in SFmode, DFmode and XFmode */
938 {4, 4, 6}, /* cost of storing fp registers
939 in SFmode, DFmode and XFmode */
940 2, /* cost of moving MMX register */
941 {2, 2}, /* cost of loading MMX registers
942 in SImode and DImode */
943 {2, 2}, /* cost of storing MMX registers
944 in SImode and DImode */
945 12, /* cost of moving SSE register */
946 {12, 12, 12}, /* cost of loading SSE registers
947 in SImode, DImode and TImode */
948 {2, 2, 8}, /* cost of storing SSE registers
949 in SImode, DImode and TImode */
950 10, /* MMX or SSE register to integer */
951 8, /* size of l1 cache. */
952 256, /* size of l2 cache. */
953 64, /* size of prefetch block */
954 6, /* number of parallel prefetches */
956 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
957 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
958 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
959 COSTS_N_INSNS (2), /* cost of FABS instruction. */
960 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
961 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
962 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
963 DUMMY_STRINGOP_ALGS},
964 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
966 DUMMY_STRINGOP_ALGS},
967 1, /* scalar_stmt_cost. */
968 1, /* scalar load_cost. */
969 1, /* scalar_store_cost. */
970 1, /* vec_stmt_cost. */
971 1, /* vec_to_scalar_cost. */
972 1, /* scalar_to_vec_cost. */
973 1, /* vec_align_load_cost. */
974 2, /* vec_unalign_load_cost. */
975 1, /* vec_store_cost. */
976 3, /* cond_taken_branch_cost. */
977 1, /* cond_not_taken_branch_cost. */
981 struct processor_costs nocona_cost = {
982 COSTS_N_INSNS (1), /* cost of an add instruction */
983 COSTS_N_INSNS (1), /* cost of a lea instruction */
984 COSTS_N_INSNS (1), /* variable shift costs */
985 COSTS_N_INSNS (1), /* constant shift costs */
986 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
987 COSTS_N_INSNS (10), /* HI */
988 COSTS_N_INSNS (10), /* SI */
989 COSTS_N_INSNS (10), /* DI */
990 COSTS_N_INSNS (10)}, /* other */
991 0, /* cost of multiply per each bit set */
992 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
993 COSTS_N_INSNS (66), /* HI */
994 COSTS_N_INSNS (66), /* SI */
995 COSTS_N_INSNS (66), /* DI */
996 COSTS_N_INSNS (66)}, /* other */
997 COSTS_N_INSNS (1), /* cost of movsx */
998 COSTS_N_INSNS (1), /* cost of movzx */
999 16, /* "large" insn */
1000 17, /* MOVE_RATIO */
1001 4, /* cost for loading QImode using movzbl */
1002 {4, 4, 4}, /* cost of loading integer registers
1003 in QImode, HImode and SImode.
1004 Relative to reg-reg move (2). */
1005 {4, 4, 4}, /* cost of storing integer registers */
1006 3, /* cost of reg,reg fld/fst */
1007 {12, 12, 12}, /* cost of loading fp registers
1008 in SFmode, DFmode and XFmode */
1009 {4, 4, 4}, /* cost of storing fp registers
1010 in SFmode, DFmode and XFmode */
1011 6, /* cost of moving MMX register */
1012 {12, 12}, /* cost of loading MMX registers
1013 in SImode and DImode */
1014 {12, 12}, /* cost of storing MMX registers
1015 in SImode and DImode */
1016 6, /* cost of moving SSE register */
1017 {12, 12, 12}, /* cost of loading SSE registers
1018 in SImode, DImode and TImode */
1019 {12, 12, 12}, /* cost of storing SSE registers
1020 in SImode, DImode and TImode */
1021 8, /* MMX or SSE register to integer */
1022 8, /* size of l1 cache. */
1023 1024, /* size of l2 cache. */
1024 128, /* size of prefetch block */
1025 8, /* number of parallel prefetches */
1026 1, /* Branch cost */
1027 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1028 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1029 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1030 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1031 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1032 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1033 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1034 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1035 {100000, unrolled_loop}, {-1, libcall}}}},
1036 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1038 {libcall, {{24, loop}, {64, unrolled_loop},
1039 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1040 1, /* scalar_stmt_cost. */
1041 1, /* scalar load_cost. */
1042 1, /* scalar_store_cost. */
1043 1, /* vec_stmt_cost. */
1044 1, /* vec_to_scalar_cost. */
1045 1, /* scalar_to_vec_cost. */
1046 1, /* vec_align_load_cost. */
1047 2, /* vec_unalign_load_cost. */
1048 1, /* vec_store_cost. */
1049 3, /* cond_taken_branch_cost. */
1050 1, /* cond_not_taken_branch_cost. */
1054 struct processor_costs core2_cost = {
1055 COSTS_N_INSNS (1), /* cost of an add instruction */
1056 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1057 COSTS_N_INSNS (1), /* variable shift costs */
1058 COSTS_N_INSNS (1), /* constant shift costs */
1059 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1060 COSTS_N_INSNS (3), /* HI */
1061 COSTS_N_INSNS (3), /* SI */
1062 COSTS_N_INSNS (3), /* DI */
1063 COSTS_N_INSNS (3)}, /* other */
1064 0, /* cost of multiply per each bit set */
1065 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
1066 COSTS_N_INSNS (22), /* HI */
1067 COSTS_N_INSNS (22), /* SI */
1068 COSTS_N_INSNS (22), /* DI */
1069 COSTS_N_INSNS (22)}, /* other */
1070 COSTS_N_INSNS (1), /* cost of movsx */
1071 COSTS_N_INSNS (1), /* cost of movzx */
1072 8, /* "large" insn */
1073 16, /* MOVE_RATIO */
1074 2, /* cost for loading QImode using movzbl */
1075 {6, 6, 6}, /* cost of loading integer registers
1076 in QImode, HImode and SImode.
1077 Relative to reg-reg move (2). */
1078 {4, 4, 4}, /* cost of storing integer registers */
1079 2, /* cost of reg,reg fld/fst */
1080 {6, 6, 6}, /* cost of loading fp registers
1081 in SFmode, DFmode and XFmode */
1082 {4, 4, 4}, /* cost of storing fp registers
1083 in SFmode, DFmode and XFmode */
1084 2, /* cost of moving MMX register */
1085 {6, 6}, /* cost of loading MMX registers
1086 in SImode and DImode */
1087 {4, 4}, /* cost of storing MMX registers
1088 in SImode and DImode */
1089 2, /* cost of moving SSE register */
1090 {6, 6, 6}, /* cost of loading SSE registers
1091 in SImode, DImode and TImode */
1092 {4, 4, 4}, /* cost of storing SSE registers
1093 in SImode, DImode and TImode */
1094 2, /* MMX or SSE register to integer */
1095 32, /* size of l1 cache. */
1096 2048, /* size of l2 cache. */
1097 128, /* size of prefetch block */
1098 8, /* number of parallel prefetches */
1099 3, /* Branch cost */
1100 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1101 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1102 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1103 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1104 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1105 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1106 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1107 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1108 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1109 {{libcall, {{8, loop}, {15, unrolled_loop},
1110 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1111 {libcall, {{24, loop}, {32, unrolled_loop},
1112 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1113 1, /* scalar_stmt_cost. */
1114 1, /* scalar load_cost. */
1115 1, /* scalar_store_cost. */
1116 1, /* vec_stmt_cost. */
1117 1, /* vec_to_scalar_cost. */
1118 1, /* scalar_to_vec_cost. */
1119 1, /* vec_align_load_cost. */
1120 2, /* vec_unalign_load_cost. */
1121 1, /* vec_store_cost. */
1122 3, /* cond_taken_branch_cost. */
1123 1, /* cond_not_taken_branch_cost. */
1127 struct processor_costs atom_cost = {
1128 COSTS_N_INSNS (1), /* cost of an add instruction */
1129 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1130 COSTS_N_INSNS (1), /* variable shift costs */
1131 COSTS_N_INSNS (1), /* constant shift costs */
1132 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1133 COSTS_N_INSNS (4), /* HI */
1134 COSTS_N_INSNS (3), /* SI */
1135 COSTS_N_INSNS (4), /* DI */
1136 COSTS_N_INSNS (2)}, /* other */
1137 0, /* cost of multiply per each bit set */
1138 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1139 COSTS_N_INSNS (26), /* HI */
1140 COSTS_N_INSNS (42), /* SI */
1141 COSTS_N_INSNS (74), /* DI */
1142 COSTS_N_INSNS (74)}, /* other */
1143 COSTS_N_INSNS (1), /* cost of movsx */
1144 COSTS_N_INSNS (1), /* cost of movzx */
1145 8, /* "large" insn */
1146 17, /* MOVE_RATIO */
1147 2, /* cost for loading QImode using movzbl */
1148 {4, 4, 4}, /* cost of loading integer registers
1149 in QImode, HImode and SImode.
1150 Relative to reg-reg move (2). */
1151 {4, 4, 4}, /* cost of storing integer registers */
1152 4, /* cost of reg,reg fld/fst */
1153 {12, 12, 12}, /* cost of loading fp registers
1154 in SFmode, DFmode and XFmode */
1155 {6, 6, 8}, /* cost of storing fp registers
1156 in SFmode, DFmode and XFmode */
1157 2, /* cost of moving MMX register */
1158 {8, 8}, /* cost of loading MMX registers
1159 in SImode and DImode */
1160 {8, 8}, /* cost of storing MMX registers
1161 in SImode and DImode */
1162 2, /* cost of moving SSE register */
1163 {8, 8, 8}, /* cost of loading SSE registers
1164 in SImode, DImode and TImode */
1165 {8, 8, 8}, /* cost of storing SSE registers
1166 in SImode, DImode and TImode */
1167 5, /* MMX or SSE register to integer */
1168 32, /* size of l1 cache. */
1169 256, /* size of l2 cache. */
1170 64, /* size of prefetch block */
1171 6, /* number of parallel prefetches */
1172 3, /* Branch cost */
1173 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1174 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1175 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1176 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1177 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1178 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1179 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1180 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1181 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1182 {{libcall, {{8, loop}, {15, unrolled_loop},
1183 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1184 {libcall, {{24, loop}, {32, unrolled_loop},
1185 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1186 1, /* scalar_stmt_cost. */
1187 1, /* scalar load_cost. */
1188 1, /* scalar_store_cost. */
1189 1, /* vec_stmt_cost. */
1190 1, /* vec_to_scalar_cost. */
1191 1, /* scalar_to_vec_cost. */
1192 1, /* vec_align_load_cost. */
1193 2, /* vec_unalign_load_cost. */
1194 1, /* vec_store_cost. */
1195 3, /* cond_taken_branch_cost. */
1196 1, /* cond_not_taken_branch_cost. */
1199 /* Generic64 should produce code tuned for Nocona and K8. */
1201 struct processor_costs generic64_cost = {
1202 COSTS_N_INSNS (1), /* cost of an add instruction */
1203 /* On all chips taken into consideration lea is 2 cycles and more. With
1204 this cost however our current implementation of synth_mult results in
1205 use of unnecessary temporary registers causing regression on several
1206 SPECfp benchmarks. */
1207 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1208 COSTS_N_INSNS (1), /* variable shift costs */
1209 COSTS_N_INSNS (1), /* constant shift costs */
1210 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1211 COSTS_N_INSNS (4), /* HI */
1212 COSTS_N_INSNS (3), /* SI */
1213 COSTS_N_INSNS (4), /* DI */
1214 COSTS_N_INSNS (2)}, /* other */
1215 0, /* cost of multiply per each bit set */
1216 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1217 COSTS_N_INSNS (26), /* HI */
1218 COSTS_N_INSNS (42), /* SI */
1219 COSTS_N_INSNS (74), /* DI */
1220 COSTS_N_INSNS (74)}, /* other */
1221 COSTS_N_INSNS (1), /* cost of movsx */
1222 COSTS_N_INSNS (1), /* cost of movzx */
1223 8, /* "large" insn */
1224 17, /* MOVE_RATIO */
1225 4, /* cost for loading QImode using movzbl */
1226 {4, 4, 4}, /* cost of loading integer registers
1227 in QImode, HImode and SImode.
1228 Relative to reg-reg move (2). */
1229 {4, 4, 4}, /* cost of storing integer registers */
1230 4, /* cost of reg,reg fld/fst */
1231 {12, 12, 12}, /* cost of loading fp registers
1232 in SFmode, DFmode and XFmode */
1233 {6, 6, 8}, /* cost of storing fp registers
1234 in SFmode, DFmode and XFmode */
1235 2, /* cost of moving MMX register */
1236 {8, 8}, /* cost of loading MMX registers
1237 in SImode and DImode */
1238 {8, 8}, /* cost of storing MMX registers
1239 in SImode and DImode */
1240 2, /* cost of moving SSE register */
1241 {8, 8, 8}, /* cost of loading SSE registers
1242 in SImode, DImode and TImode */
1243 {8, 8, 8}, /* cost of storing SSE registers
1244 in SImode, DImode and TImode */
1245 5, /* MMX or SSE register to integer */
1246 32, /* size of l1 cache. */
1247 512, /* size of l2 cache. */
1248 64, /* size of prefetch block */
1249 6, /* number of parallel prefetches */
1250 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1251 value is increased to perhaps more appropriate value of 5. */
1252 3, /* Branch cost */
1253 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1254 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1255 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1256 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1257 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1258 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1259 {DUMMY_STRINGOP_ALGS,
1260 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1261 {DUMMY_STRINGOP_ALGS,
1262 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1263 1, /* scalar_stmt_cost. */
1264 1, /* scalar load_cost. */
1265 1, /* scalar_store_cost. */
1266 1, /* vec_stmt_cost. */
1267 1, /* vec_to_scalar_cost. */
1268 1, /* scalar_to_vec_cost. */
1269 1, /* vec_align_load_cost. */
1270 2, /* vec_unalign_load_cost. */
1271 1, /* vec_store_cost. */
1272 3, /* cond_taken_branch_cost. */
1273 1, /* cond_not_taken_branch_cost. */
1276 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1279 struct processor_costs generic32_cost = {
1280 COSTS_N_INSNS (1), /* cost of an add instruction */
1281 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1282 COSTS_N_INSNS (1), /* variable shift costs */
1283 COSTS_N_INSNS (1), /* constant shift costs */
1284 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1285 COSTS_N_INSNS (4), /* HI */
1286 COSTS_N_INSNS (3), /* SI */
1287 COSTS_N_INSNS (4), /* DI */
1288 COSTS_N_INSNS (2)}, /* other */
1289 0, /* cost of multiply per each bit set */
1290 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1291 COSTS_N_INSNS (26), /* HI */
1292 COSTS_N_INSNS (42), /* SI */
1293 COSTS_N_INSNS (74), /* DI */
1294 COSTS_N_INSNS (74)}, /* other */
1295 COSTS_N_INSNS (1), /* cost of movsx */
1296 COSTS_N_INSNS (1), /* cost of movzx */
1297 8, /* "large" insn */
1298 17, /* MOVE_RATIO */
1299 4, /* cost for loading QImode using movzbl */
1300 {4, 4, 4}, /* cost of loading integer registers
1301 in QImode, HImode and SImode.
1302 Relative to reg-reg move (2). */
1303 {4, 4, 4}, /* cost of storing integer registers */
1304 4, /* cost of reg,reg fld/fst */
1305 {12, 12, 12}, /* cost of loading fp registers
1306 in SFmode, DFmode and XFmode */
1307 {6, 6, 8}, /* cost of storing fp registers
1308 in SFmode, DFmode and XFmode */
1309 2, /* cost of moving MMX register */
1310 {8, 8}, /* cost of loading MMX registers
1311 in SImode and DImode */
1312 {8, 8}, /* cost of storing MMX registers
1313 in SImode and DImode */
1314 2, /* cost of moving SSE register */
1315 {8, 8, 8}, /* cost of loading SSE registers
1316 in SImode, DImode and TImode */
1317 {8, 8, 8}, /* cost of storing SSE registers
1318 in SImode, DImode and TImode */
1319 5, /* MMX or SSE register to integer */
1320 32, /* size of l1 cache. */
1321 256, /* size of l2 cache. */
1322 64, /* size of prefetch block */
1323 6, /* number of parallel prefetches */
1324 3, /* Branch cost */
1325 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1326 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1327 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1328 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1329 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1330 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1331 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1332 DUMMY_STRINGOP_ALGS},
1333 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1334 DUMMY_STRINGOP_ALGS},
1335 1, /* scalar_stmt_cost. */
1336 1, /* scalar load_cost. */
1337 1, /* scalar_store_cost. */
1338 1, /* vec_stmt_cost. */
1339 1, /* vec_to_scalar_cost. */
1340 1, /* scalar_to_vec_cost. */
1341 1, /* vec_align_load_cost. */
1342 2, /* vec_unalign_load_cost. */
1343 1, /* vec_store_cost. */
1344 3, /* cond_taken_branch_cost. */
1345 1, /* cond_not_taken_branch_cost. */
1348 const struct processor_costs *ix86_cost = &pentium_cost;
1350 /* Processor feature/optimization bitmasks. */
1351 #define m_386 (1<<PROCESSOR_I386)
1352 #define m_486 (1<<PROCESSOR_I486)
1353 #define m_PENT (1<<PROCESSOR_PENTIUM)
1354 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1355 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1356 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1357 #define m_CORE2 (1<<PROCESSOR_CORE2)
1358 #define m_ATOM (1<<PROCESSOR_ATOM)
1360 #define m_GEODE (1<<PROCESSOR_GEODE)
1361 #define m_K6 (1<<PROCESSOR_K6)
1362 #define m_K6_GEODE (m_K6 | m_GEODE)
1363 #define m_K8 (1<<PROCESSOR_K8)
1364 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1365 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1366 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1367 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1368 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10 | m_BDVER1)
1370 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1371 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1373 /* Generic instruction choice should be common subset of supported CPUs
1374 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1375 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1377 /* Feature tests against the various tunings. */
1378 unsigned char ix86_tune_features[X86_TUNE_LAST];
1380 /* Feature tests against the various tunings used to create ix86_tune_features
1381 based on the processor mask. */
1382 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1383 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1384 negatively, so enabling for Generic64 seems like good code size
1385 tradeoff. We can't enable it for 32bit generic because it does not
1386 work well with PPro base chips. */
1387 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1389 /* X86_TUNE_PUSH_MEMORY */
1390 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1391 | m_NOCONA | m_CORE2 | m_GENERIC,
1393 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1396 /* X86_TUNE_UNROLL_STRLEN */
1397 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1398 | m_CORE2 | m_GENERIC,
1400 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1401 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1403 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1404 on simulation result. But after P4 was made, no performance benefit
1405 was observed with branch hints. It also increases the code size.
1406 As a result, icc never generates branch hints. */
1409 /* X86_TUNE_DOUBLE_WITH_ADD */
1412 /* X86_TUNE_USE_SAHF */
1413 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER1 | m_PENT4
1414 | m_NOCONA | m_CORE2 | m_GENERIC,
1416 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1417 partial dependencies. */
1418 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1419 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1421 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1422 register stalls on Generic32 compilation setting as well. However
1423 in current implementation the partial register stalls are not eliminated
1424 very well - they can be introduced via subregs synthesized by combine
1425 and can happen in caller/callee saving sequences. Because this option
1426 pays back little on PPro based chips and is in conflict with partial reg
1427 dependencies used by Athlon/P4 based chips, it is better to leave it off
1428 for generic32 for now. */
1431 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1432 m_CORE2 | m_GENERIC,
1434 /* X86_TUNE_USE_HIMODE_FIOP */
1435 m_386 | m_486 | m_K6_GEODE,
1437 /* X86_TUNE_USE_SIMODE_FIOP */
1438 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
1440 /* X86_TUNE_USE_MOV0 */
1443 /* X86_TUNE_USE_CLTD */
1444 ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
1446 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1449 /* X86_TUNE_SPLIT_LONG_MOVES */
1452 /* X86_TUNE_READ_MODIFY_WRITE */
1455 /* X86_TUNE_READ_MODIFY */
1458 /* X86_TUNE_PROMOTE_QIMODE */
1459 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1460 | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
1462 /* X86_TUNE_FAST_PREFIX */
1463 ~(m_PENT | m_486 | m_386),
1465 /* X86_TUNE_SINGLE_STRINGOP */
1466 m_386 | m_PENT4 | m_NOCONA,
1468 /* X86_TUNE_QIMODE_MATH */
1471 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1472 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1473 might be considered for Generic32 if our scheme for avoiding partial
1474 stalls was more effective. */
1477 /* X86_TUNE_PROMOTE_QI_REGS */
1480 /* X86_TUNE_PROMOTE_HI_REGS */
1483 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1484 m_ATOM | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA
1485 | m_CORE2 | m_GENERIC,
1487 /* X86_TUNE_ADD_ESP_8 */
1488 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_K6_GEODE | m_386
1489 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1491 /* X86_TUNE_SUB_ESP_4 */
1492 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2
1495 /* X86_TUNE_SUB_ESP_8 */
1496 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_386 | m_486
1497 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1499 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1500 for DFmode copies */
1501 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1502 | m_GENERIC | m_GEODE),
1504 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1505 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1507 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1508 conflict here in between PPro/Pentium4 based chips that thread 128bit
1509 SSE registers as single units versus K8 based chips that divide SSE
1510 registers to two 64bit halves. This knob promotes all store destinations
1511 to be 128bit to allow register renaming on 128bit SSE units, but usually
1512 results in one extra microop on 64bit SSE units. Experimental results
1513 shows that disabling this option on P4 brings over 20% SPECfp regression,
1514 while enabling it on K8 brings roughly 2.4% regression that can be partly
1515 masked by careful scheduling of moves. */
1516 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
1517 | m_AMDFAM10 | m_BDVER1,
1519 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
1520 m_AMDFAM10 | m_BDVER1,
1522 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
1525 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
1528 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1529 are resolved on SSE register parts instead of whole registers, so we may
1530 maintain just lower part of scalar values in proper format leaving the
1531 upper part undefined. */
1534 /* X86_TUNE_SSE_TYPELESS_STORES */
1537 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1538 m_PPRO | m_PENT4 | m_NOCONA,
1540 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1541 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1543 /* X86_TUNE_PROLOGUE_USING_MOVE */
1544 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1546 /* X86_TUNE_EPILOGUE_USING_MOVE */
1547 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1549 /* X86_TUNE_SHIFT1 */
1552 /* X86_TUNE_USE_FFREEP */
1555 /* X86_TUNE_INTER_UNIT_MOVES */
1556 ~(m_AMD_MULTIPLE | m_GENERIC),
1558 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1559 ~(m_AMDFAM10 | m_BDVER1),
1561 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1562 than 4 branch instructions in the 16 byte window. */
1563 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
1566 /* X86_TUNE_SCHEDULE */
1567 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
1570 /* X86_TUNE_USE_BT */
1571 m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
1573 /* X86_TUNE_USE_INCDEC */
1574 ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
1576 /* X86_TUNE_PAD_RETURNS */
1577 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1579 /* X86_TUNE_EXT_80387_CONSTANTS */
1580 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
1581 | m_CORE2 | m_GENERIC,
1583 /* X86_TUNE_SHORTEN_X87_SSE */
1586 /* X86_TUNE_AVOID_VECTOR_DECODE */
1589 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1590 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1593 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1594 vector path on AMD machines. */
1595 m_K8 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1,
1597 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1599 m_K8 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1,
1601 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1605 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1606 but one byte longer. */
1609 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1610 operand that cannot be represented using a modRM byte. The XOR
1611 replacement is long decoded, so this split helps here as well. */
1614 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1616 m_AMDFAM10 | m_GENERIC,
1618 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1619 from integer to FP. */
1622 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1623 with a subsequent conditional jump instruction into a single
1624 compare-and-branch uop. */
1627 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1628 will impact LEA instruction selection. */
1632 /* Feature tests against the various architecture variations. */
1633 unsigned char ix86_arch_features[X86_ARCH_LAST];
1635 /* Feature tests against the various architecture variations, used to create
1636 ix86_arch_features based on the processor mask. */
1637 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1638 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1639 ~(m_386 | m_486 | m_PENT | m_K6),
1641 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1644 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1647 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1650 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1654 static const unsigned int x86_accumulate_outgoing_args
1655 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1658 static const unsigned int x86_arch_always_fancy_math_387
1659 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1660 | m_NOCONA | m_CORE2 | m_GENERIC;
1662 static enum stringop_alg stringop_alg = no_stringop;
1664 /* In case the average insn count for single function invocation is
1665 lower than this constant, emit fast (but longer) prologue and
1667 #define FAST_PROLOGUE_INSN_COUNT 20
1669 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1670 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1671 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1672 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1674 /* Array of the smallest class containing reg number REGNO, indexed by
1675 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1677 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1679 /* ax, dx, cx, bx */
1680 AREG, DREG, CREG, BREG,
1681 /* si, di, bp, sp */
1682 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1684 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1685 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1688 /* flags, fpsr, fpcr, frame */
1689 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1691 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1694 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1697 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1698 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1699 /* SSE REX registers */
1700 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1704 /* The "default" register map used in 32bit mode. */
1706 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1708 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1709 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1710 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1711 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1712 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1713 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1714 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1717 /* The "default" register map used in 64bit mode. */
1719 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1721 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1722 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1723 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1724 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1725 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1726 8,9,10,11,12,13,14,15, /* extended integer registers */
1727 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1730 /* Define the register numbers to be used in Dwarf debugging information.
1731 The SVR4 reference port C compiler uses the following register numbers
1732 in its Dwarf output code:
1733 0 for %eax (gcc regno = 0)
1734 1 for %ecx (gcc regno = 2)
1735 2 for %edx (gcc regno = 1)
1736 3 for %ebx (gcc regno = 3)
1737 4 for %esp (gcc regno = 7)
1738 5 for %ebp (gcc regno = 6)
1739 6 for %esi (gcc regno = 4)
1740 7 for %edi (gcc regno = 5)
1741 The following three DWARF register numbers are never generated by
1742 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1743 believes these numbers have these meanings.
1744 8 for %eip (no gcc equivalent)
1745 9 for %eflags (gcc regno = 17)
1746 10 for %trapno (no gcc equivalent)
1747 It is not at all clear how we should number the FP stack registers
1748 for the x86 architecture. If the version of SDB on x86/svr4 were
1749 a bit less brain dead with respect to floating-point then we would
1750 have a precedent to follow with respect to DWARF register numbers
1751 for x86 FP registers, but the SDB on x86/svr4 is so completely
1752 broken with respect to FP registers that it is hardly worth thinking
1753 of it as something to strive for compatibility with.
1754 The version of x86/svr4 SDB I have at the moment does (partially)
1755 seem to believe that DWARF register number 11 is associated with
1756 the x86 register %st(0), but that's about all. Higher DWARF
1757 register numbers don't seem to be associated with anything in
1758 particular, and even for DWARF regno 11, SDB only seems to under-
1759 stand that it should say that a variable lives in %st(0) (when
1760 asked via an `=' command) if we said it was in DWARF regno 11,
1761 but SDB still prints garbage when asked for the value of the
1762 variable in question (via a `/' command).
1763 (Also note that the labels SDB prints for various FP stack regs
1764 when doing an `x' command are all wrong.)
1765 Note that these problems generally don't affect the native SVR4
1766 C compiler because it doesn't allow the use of -O with -g and
1767 because when it is *not* optimizing, it allocates a memory
1768 location for each floating-point variable, and the memory
1769 location is what gets described in the DWARF AT_location
1770 attribute for the variable in question.
1771 Regardless of the severe mental illness of the x86/svr4 SDB, we
1772 do something sensible here and we use the following DWARF
1773 register numbers. Note that these are all stack-top-relative
1775 11 for %st(0) (gcc regno = 8)
1776 12 for %st(1) (gcc regno = 9)
1777 13 for %st(2) (gcc regno = 10)
1778 14 for %st(3) (gcc regno = 11)
1779 15 for %st(4) (gcc regno = 12)
1780 16 for %st(5) (gcc regno = 13)
1781 17 for %st(6) (gcc regno = 14)
1782 18 for %st(7) (gcc regno = 15)
1784 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1786 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1787 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1788 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1789 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1790 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1791 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1792 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1795 /* Test and compare insns in i386.md store the information needed to
1796 generate branch and scc insns here. */
1798 rtx ix86_compare_op0 = NULL_RTX;
1799 rtx ix86_compare_op1 = NULL_RTX;
1801 /* Define parameter passing and return registers. */
1803 static int const x86_64_int_parameter_registers[6] =
1805 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
1808 static int const x86_64_ms_abi_int_parameter_registers[4] =
1810 CX_REG, DX_REG, R8_REG, R9_REG
1813 static int const x86_64_int_return_registers[4] =
1815 AX_REG, DX_REG, DI_REG, SI_REG
1818 /* Define the structure for the machine field in struct function. */
1820 struct GTY(()) stack_local_entry {
1821 unsigned short mode;
1824 struct stack_local_entry *next;
1827 /* Structure describing stack frame layout.
1828 Stack grows downward:
1834 saved frame pointer if frame_pointer_needed
1835 <- HARD_FRAME_POINTER
1844 [va_arg registers] (
1845 > to_allocate <- FRAME_POINTER
1858 HOST_WIDE_INT frame;
1860 int outgoing_arguments_size;
1862 HOST_WIDE_INT to_allocate;
1863 /* The offsets relative to ARG_POINTER. */
1864 HOST_WIDE_INT frame_pointer_offset;
1865 HOST_WIDE_INT hard_frame_pointer_offset;
1866 HOST_WIDE_INT stack_pointer_offset;
1868 /* When save_regs_using_mov is set, emit prologue using
1869 move instead of push instructions. */
1870 bool save_regs_using_mov;
1873 /* Code model option. */
1874 enum cmodel ix86_cmodel;
1876 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1878 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1880 /* Which unit we are generating floating point math for. */
1881 enum fpmath_unit ix86_fpmath;
1883 /* Which cpu are we scheduling for. */
1884 enum attr_cpu ix86_schedule;
1886 /* Which cpu are we optimizing for. */
1887 enum processor_type ix86_tune;
1889 /* Which instruction set architecture to use. */
1890 enum processor_type ix86_arch;
1892 /* true if sse prefetch instruction is not NOOP. */
1893 int x86_prefetch_sse;
1895 /* ix86_regparm_string as a number */
1896 static int ix86_regparm;
1898 /* -mstackrealign option */
1899 extern int ix86_force_align_arg_pointer;
1900 static const char ix86_force_align_arg_pointer_string[]
1901 = "force_align_arg_pointer";
1903 static rtx (*ix86_gen_leave) (void);
1904 static rtx (*ix86_gen_pop1) (rtx);
1905 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1906 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1907 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
1908 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1909 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1910 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1911 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
1912 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
1913 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
1915 /* Preferred alignment for stack boundary in bits. */
1916 unsigned int ix86_preferred_stack_boundary;
1918 /* Alignment for incoming stack boundary in bits specified at
1920 static unsigned int ix86_user_incoming_stack_boundary;
1922 /* Default alignment for incoming stack boundary in bits. */
1923 static unsigned int ix86_default_incoming_stack_boundary;
1925 /* Alignment for incoming stack boundary in bits. */
1926 unsigned int ix86_incoming_stack_boundary;
1928 /* The abi used by target. */
1929 enum calling_abi ix86_abi;
1931 /* Values 1-5: see jump.c */
1932 int ix86_branch_cost;
1934 /* Calling abi specific va_list type nodes. */
1935 static GTY(()) tree sysv_va_list_type_node;
1936 static GTY(()) tree ms_va_list_type_node;
1938 /* Variables which are this size or smaller are put in the data/bss
1939 or ldata/lbss sections. */
1941 int ix86_section_threshold = 65536;
1943 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1944 char internal_label_prefix[16];
1945 int internal_label_prefix_len;
1947 /* Fence to use after loop using movnt. */
1950 /* Register class used for passing given 64bit part of the argument.
1951 These represent classes as documented by the PS ABI, with the exception
1952 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1953 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1955 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1956 whenever possible (upper half does contain padding). */
1957 enum x86_64_reg_class
1960 X86_64_INTEGER_CLASS,
1961 X86_64_INTEGERSI_CLASS,
1968 X86_64_COMPLEX_X87_CLASS,
1972 #define MAX_CLASSES 4
1974 /* Table of constants used by fldpi, fldln2, etc.... */
1975 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1976 static bool ext_80387_constants_init = 0;
1979 static struct machine_function * ix86_init_machine_status (void);
1980 static rtx ix86_function_value (const_tree, const_tree, bool);
1981 static bool ix86_function_value_regno_p (const unsigned int);
1982 static rtx ix86_static_chain (const_tree, bool);
1983 static int ix86_function_regparm (const_tree, const_tree);
1984 static void ix86_compute_frame_layout (struct ix86_frame *);
1985 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1987 static void ix86_add_new_builtins (int);
1988 static rtx ix86_expand_vec_perm_builtin (tree);
1989 static tree ix86_canonical_va_list_type (tree);
1991 enum ix86_function_specific_strings
1993 IX86_FUNCTION_SPECIFIC_ARCH,
1994 IX86_FUNCTION_SPECIFIC_TUNE,
1995 IX86_FUNCTION_SPECIFIC_FPMATH,
1996 IX86_FUNCTION_SPECIFIC_MAX
1999 static char *ix86_target_string (int, int, const char *, const char *,
2000 const char *, bool);
2001 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2002 static void ix86_function_specific_save (struct cl_target_option *);
2003 static void ix86_function_specific_restore (struct cl_target_option *);
2004 static void ix86_function_specific_print (FILE *, int,
2005 struct cl_target_option *);
2006 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2007 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
2008 static bool ix86_can_inline_p (tree, tree);
2009 static void ix86_set_current_function (tree);
2010 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2012 static enum calling_abi ix86_function_abi (const_tree);
2015 #ifndef SUBTARGET32_DEFAULT_CPU
2016 #define SUBTARGET32_DEFAULT_CPU "i386"
2019 /* The svr4 ABI for the i386 says that records and unions are returned
2021 #ifndef DEFAULT_PCC_STRUCT_RETURN
2022 #define DEFAULT_PCC_STRUCT_RETURN 1
2025 /* Whether -mtune= or -march= were specified */
2026 static int ix86_tune_defaulted;
2027 static int ix86_arch_specified;
2029 /* Bit flags that specify the ISA we are compiling for. */
2030 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
2032 /* A mask of ix86_isa_flags that includes bit X if X
2033 was set or cleared on the command line. */
2034 static int ix86_isa_flags_explicit;
2036 /* Define a set of ISAs which are available when a given ISA is
2037 enabled. MMX and SSE ISAs are handled separately. */
2039 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
2040 #define OPTION_MASK_ISA_3DNOW_SET \
2041 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
2043 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
2044 #define OPTION_MASK_ISA_SSE2_SET \
2045 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
2046 #define OPTION_MASK_ISA_SSE3_SET \
2047 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
2048 #define OPTION_MASK_ISA_SSSE3_SET \
2049 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
2050 #define OPTION_MASK_ISA_SSE4_1_SET \
2051 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
2052 #define OPTION_MASK_ISA_SSE4_2_SET \
2053 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
2054 #define OPTION_MASK_ISA_AVX_SET \
2055 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
2056 #define OPTION_MASK_ISA_FMA_SET \
2057 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
2059 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
2061 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
2063 #define OPTION_MASK_ISA_SSE4A_SET \
2064 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
2065 #define OPTION_MASK_ISA_FMA4_SET \
2066 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \
2067 | OPTION_MASK_ISA_AVX_SET)
2068 #define OPTION_MASK_ISA_XOP_SET \
2069 (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET)
2070 #define OPTION_MASK_ISA_LWP_SET \
2073 /* AES and PCLMUL need SSE2 because they use xmm registers */
2074 #define OPTION_MASK_ISA_AES_SET \
2075 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
2076 #define OPTION_MASK_ISA_PCLMUL_SET \
2077 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
2079 #define OPTION_MASK_ISA_ABM_SET \
2080 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
2082 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
2083 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
2084 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
2085 #define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
2086 #define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
2088 #define OPTION_MASK_ISA_FSGSBASE_SET OPTION_MASK_ISA_FSGSBASE
2089 #define OPTION_MASK_ISA_RDRND_SET OPTION_MASK_ISA_RDRND
2090 #define OPTION_MASK_ISA_F16C_SET \
2091 (OPTION_MASK_ISA_F16C | OPTION_MASK_ISA_AVX_SET)
2093 /* Define a set of ISAs which aren't available when a given ISA is
2094 disabled. MMX and SSE ISAs are handled separately. */
2096 #define OPTION_MASK_ISA_MMX_UNSET \
2097 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
2098 #define OPTION_MASK_ISA_3DNOW_UNSET \
2099 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
2100 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
2102 #define OPTION_MASK_ISA_SSE_UNSET \
2103 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
2104 #define OPTION_MASK_ISA_SSE2_UNSET \
2105 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
2106 #define OPTION_MASK_ISA_SSE3_UNSET \
2107 (OPTION_MASK_ISA_SSE3 \
2108 | OPTION_MASK_ISA_SSSE3_UNSET \
2109 | OPTION_MASK_ISA_SSE4A_UNSET )
2110 #define OPTION_MASK_ISA_SSSE3_UNSET \
2111 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
2112 #define OPTION_MASK_ISA_SSE4_1_UNSET \
2113 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
2114 #define OPTION_MASK_ISA_SSE4_2_UNSET \
2115 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
2116 #define OPTION_MASK_ISA_AVX_UNSET \
2117 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
2118 | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET)
2119 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2121 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2123 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2125 #define OPTION_MASK_ISA_SSE4A_UNSET \
2126 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET)
2128 #define OPTION_MASK_ISA_FMA4_UNSET \
2129 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET)
2130 #define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP
2131 #define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP
2133 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2134 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2135 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2136 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2137 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2138 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2139 #define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
2140 #define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
2142 #define OPTION_MASK_ISA_FSGSBASE_UNSET OPTION_MASK_ISA_FSGSBASE
2143 #define OPTION_MASK_ISA_RDRND_UNSET OPTION_MASK_ISA_RDRND
2144 #define OPTION_MASK_ISA_F16C_UNSET OPTION_MASK_ISA_F16C
2146 /* Vectorization library interface and handlers. */
2147 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2149 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2150 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2152 /* Processor target table, indexed by processor number */
2155 const struct processor_costs *cost; /* Processor costs */
2156 const int align_loop; /* Default alignments. */
2157 const int align_loop_max_skip;
2158 const int align_jump;
2159 const int align_jump_max_skip;
2160 const int align_func;
2163 static const struct ptt processor_target_table[PROCESSOR_max] =
2165 {&i386_cost, 4, 3, 4, 3, 4},
2166 {&i486_cost, 16, 15, 16, 15, 16},
2167 {&pentium_cost, 16, 7, 16, 7, 16},
2168 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2169 {&geode_cost, 0, 0, 0, 0, 0},
2170 {&k6_cost, 32, 7, 32, 7, 32},
2171 {&athlon_cost, 16, 7, 16, 7, 16},
2172 {&pentium4_cost, 0, 0, 0, 0, 0},
2173 {&k8_cost, 16, 7, 16, 7, 16},
2174 {&nocona_cost, 0, 0, 0, 0, 0},
2175 {&core2_cost, 16, 10, 16, 10, 16},
2176 {&generic32_cost, 16, 7, 16, 7, 16},
2177 {&generic64_cost, 16, 10, 16, 10, 16},
2178 {&amdfam10_cost, 32, 24, 32, 7, 32},
2179 {&bdver1_cost, 32, 24, 32, 7, 32},
2180 {&atom_cost, 16, 7, 16, 7, 16}
2183 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2210 /* Implement TARGET_HANDLE_OPTION. */
2213 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
2220 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2221 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2225 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2226 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2233 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2234 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2238 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2239 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2249 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2250 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2254 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2255 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2262 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2263 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2267 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2268 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2275 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2276 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2280 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2281 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2288 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2289 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2293 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2294 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2301 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2302 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2306 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2307 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2314 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2315 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2319 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2320 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2327 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2328 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2332 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2333 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2340 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2341 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2345 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2346 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2351 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2352 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2356 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2357 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2363 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2364 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2368 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2369 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2376 ix86_isa_flags |= OPTION_MASK_ISA_FMA4_SET;
2377 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_SET;
2381 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA4_UNSET;
2382 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_UNSET;
2389 ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET;
2390 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET;
2394 ix86_isa_flags &= ~OPTION_MASK_ISA_XOP_UNSET;
2395 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_UNSET;
2402 ix86_isa_flags |= OPTION_MASK_ISA_LWP_SET;
2403 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_SET;
2407 ix86_isa_flags &= ~OPTION_MASK_ISA_LWP_UNSET;
2408 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_UNSET;
2415 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2416 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2420 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2421 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2428 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2429 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2433 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2434 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2441 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2442 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2446 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2447 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2454 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2455 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2459 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2460 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2467 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
2468 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
2472 ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
2473 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
2480 ix86_isa_flags |= OPTION_MASK_ISA_CRC32_SET;
2481 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_SET;
2485 ix86_isa_flags &= ~OPTION_MASK_ISA_CRC32_UNSET;
2486 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_UNSET;
2493 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2494 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2498 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2499 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2506 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2507 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2511 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2512 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2519 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE_SET;
2520 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_SET;
2524 ix86_isa_flags &= ~OPTION_MASK_ISA_FSGSBASE_UNSET;
2525 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_UNSET;
2532 ix86_isa_flags |= OPTION_MASK_ISA_RDRND_SET;
2533 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_SET;
2537 ix86_isa_flags &= ~OPTION_MASK_ISA_RDRND_UNSET;
2538 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_UNSET;
2545 ix86_isa_flags |= OPTION_MASK_ISA_F16C_SET;
2546 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_SET;
2550 ix86_isa_flags &= ~OPTION_MASK_ISA_F16C_UNSET;
2551 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_UNSET;
2560 /* Return a string that documents the current -m options. The caller is
2561 responsible for freeing the string. */
2564 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2565 const char *fpmath, bool add_nl_p)
2567 struct ix86_target_opts
2569 const char *option; /* option string */
2570 int mask; /* isa mask options */
2573 /* This table is ordered so that options like -msse4.2 that imply
2574 preceding options while match those first. */
2575 static struct ix86_target_opts isa_opts[] =
2577 { "-m64", OPTION_MASK_ISA_64BIT },
2578 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2579 { "-mfma", OPTION_MASK_ISA_FMA },
2580 { "-mxop", OPTION_MASK_ISA_XOP },
2581 { "-mlwp", OPTION_MASK_ISA_LWP },
2582 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2583 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2584 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2585 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2586 { "-msse3", OPTION_MASK_ISA_SSE3 },
2587 { "-msse2", OPTION_MASK_ISA_SSE2 },
2588 { "-msse", OPTION_MASK_ISA_SSE },
2589 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2590 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2591 { "-mmmx", OPTION_MASK_ISA_MMX },
2592 { "-mabm", OPTION_MASK_ISA_ABM },
2593 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2594 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2595 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2596 { "-maes", OPTION_MASK_ISA_AES },
2597 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2598 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2599 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2600 { "-mf16c", OPTION_MASK_ISA_F16C },
2604 static struct ix86_target_opts flag_opts[] =
2606 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2607 { "-m80387", MASK_80387 },
2608 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2609 { "-malign-double", MASK_ALIGN_DOUBLE },
2610 { "-mcld", MASK_CLD },
2611 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2612 { "-mieee-fp", MASK_IEEE_FP },
2613 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2614 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2615 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2616 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2617 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2618 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2619 { "-mno-red-zone", MASK_NO_RED_ZONE },
2620 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2621 { "-mrecip", MASK_RECIP },
2622 { "-mrtd", MASK_RTD },
2623 { "-msseregparm", MASK_SSEREGPARM },
2624 { "-mstack-arg-probe", MASK_STACK_PROBE },
2625 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2628 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2631 char target_other[40];
2640 memset (opts, '\0', sizeof (opts));
2642 /* Add -march= option. */
2645 opts[num][0] = "-march=";
2646 opts[num++][1] = arch;
2649 /* Add -mtune= option. */
2652 opts[num][0] = "-mtune=";
2653 opts[num++][1] = tune;
2656 /* Pick out the options in isa options. */
2657 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2659 if ((isa & isa_opts[i].mask) != 0)
2661 opts[num++][0] = isa_opts[i].option;
2662 isa &= ~ isa_opts[i].mask;
2666 if (isa && add_nl_p)
2668 opts[num++][0] = isa_other;
2669 sprintf (isa_other, "(other isa: %#x)", isa);
2672 /* Add flag options. */
2673 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2675 if ((flags & flag_opts[i].mask) != 0)
2677 opts[num++][0] = flag_opts[i].option;
2678 flags &= ~ flag_opts[i].mask;
2682 if (flags && add_nl_p)
2684 opts[num++][0] = target_other;
2685 sprintf (target_other, "(other flags: %#x)", flags);
2688 /* Add -fpmath= option. */
2691 opts[num][0] = "-mfpmath=";
2692 opts[num++][1] = fpmath;
2699 gcc_assert (num < ARRAY_SIZE (opts));
2701 /* Size the string. */
2703 sep_len = (add_nl_p) ? 3 : 1;
2704 for (i = 0; i < num; i++)
2707 for (j = 0; j < 2; j++)
2709 len += strlen (opts[i][j]);
2712 /* Build the string. */
2713 ret = ptr = (char *) xmalloc (len);
2716 for (i = 0; i < num; i++)
2720 for (j = 0; j < 2; j++)
2721 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2728 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2736 for (j = 0; j < 2; j++)
2739 memcpy (ptr, opts[i][j], len2[j]);
2741 line_len += len2[j];
2746 gcc_assert (ret + len >= ptr);
2751 /* Return TRUE if software prefetching is beneficial for the
2755 software_prefetching_beneficial_p (void)
2759 case PROCESSOR_GEODE:
2761 case PROCESSOR_ATHLON:
2763 case PROCESSOR_AMDFAM10:
2771 /* Function that is callable from the debugger to print the current
2774 ix86_debug_options (void)
2776 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2777 ix86_arch_string, ix86_tune_string,
2778 ix86_fpmath_string, true);
2782 fprintf (stderr, "%s\n\n", opts);
2786 fputs ("<no options>\n\n", stderr);
2791 /* Sometimes certain combinations of command options do not make
2792 sense on a particular target machine. You can define a macro
2793 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2794 defined, is executed once just after all the command options have
2797 Don't use this macro to turn on various extra optimizations for
2798 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2801 override_options (bool main_args_p)
2804 unsigned int ix86_arch_mask, ix86_tune_mask;
2805 const bool ix86_tune_specified = (ix86_tune_string != NULL);
2810 /* Comes from final.c -- no real reason to change it. */
2811 #define MAX_CODE_ALIGN 16
2819 PTA_PREFETCH_SSE = 1 << 4,
2821 PTA_3DNOW_A = 1 << 6,
2825 PTA_POPCNT = 1 << 10,
2827 PTA_SSE4A = 1 << 12,
2828 PTA_NO_SAHF = 1 << 13,
2829 PTA_SSE4_1 = 1 << 14,
2830 PTA_SSE4_2 = 1 << 15,
2832 PTA_PCLMUL = 1 << 17,
2835 PTA_MOVBE = 1 << 20,
2839 PTA_FSGSBASE = 1 << 24,
2840 PTA_RDRND = 1 << 25,
2846 const char *const name; /* processor name or nickname. */
2847 const enum processor_type processor;
2848 const enum attr_cpu schedule;
2849 const unsigned /*enum pta_flags*/ flags;
2851 const processor_alias_table[] =
2853 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2854 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2855 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2856 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2857 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2858 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2859 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2860 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2861 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2862 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2863 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2864 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2865 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2867 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2869 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2870 PTA_MMX | PTA_SSE | PTA_SSE2},
2871 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2872 PTA_MMX |PTA_SSE | PTA_SSE2},
2873 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2874 PTA_MMX | PTA_SSE | PTA_SSE2},
2875 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2876 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2877 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2878 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2879 | PTA_CX16 | PTA_NO_SAHF},
2880 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2881 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2882 | PTA_SSSE3 | PTA_CX16},
2883 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2884 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2885 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
2886 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2887 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2888 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2889 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2890 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2891 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2892 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2893 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2894 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2895 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2896 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2897 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2898 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2899 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2900 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2901 {"x86-64", PROCESSOR_K8, CPU_K8,
2902 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2903 {"k8", PROCESSOR_K8, CPU_K8,
2904 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2905 | PTA_SSE2 | PTA_NO_SAHF},
2906 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2907 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2908 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2909 {"opteron", PROCESSOR_K8, CPU_K8,
2910 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2911 | PTA_SSE2 | PTA_NO_SAHF},
2912 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2913 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2914 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2915 {"athlon64", PROCESSOR_K8, CPU_K8,
2916 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2917 | PTA_SSE2 | PTA_NO_SAHF},
2918 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2919 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2920 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2921 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2922 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2923 | PTA_SSE2 | PTA_NO_SAHF},
2924 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2925 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2926 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2927 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2928 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2929 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2930 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
2931 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2932 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM
2933 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES
2934 | PTA_PCLMUL | PTA_AVX | PTA_FMA4 | PTA_XOP | PTA_LWP},
2935 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2936 0 /* flags are only used for -march switch. */ },
2937 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2938 PTA_64BIT /* flags are only used for -march switch. */ },
2941 int const pta_size = ARRAY_SIZE (processor_alias_table);
2943 /* Set up prefix/suffix so the error messages refer to either the command
2944 line argument, or the attribute(target). */
2953 prefix = "option(\"";
2958 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2959 SUBTARGET_OVERRIDE_OPTIONS;
2962 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2963 SUBSUBTARGET_OVERRIDE_OPTIONS;
2966 /* -fPIC is the default for x86_64. */
2967 if (TARGET_MACHO && TARGET_64BIT)
2970 /* Set the default values for switches whose default depends on TARGET_64BIT
2971 in case they weren't overwritten by command line options. */
2976 /* Mach-O doesn't support omitting the frame pointer for now. */
2977 if (flag_omit_frame_pointer == 2)
2978 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2979 if (flag_asynchronous_unwind_tables == 2)
2980 flag_asynchronous_unwind_tables = 1;
2981 if (flag_pcc_struct_return == 2)
2982 flag_pcc_struct_return = 0;
2988 if (flag_omit_frame_pointer == 2)
2989 flag_omit_frame_pointer = 0;
2990 if (flag_asynchronous_unwind_tables == 2)
2991 flag_asynchronous_unwind_tables = 0;
2992 if (flag_pcc_struct_return == 2)
2993 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2996 /* Need to check -mtune=generic first. */
2997 if (ix86_tune_string)
2999 if (!strcmp (ix86_tune_string, "generic")
3000 || !strcmp (ix86_tune_string, "i686")
3001 /* As special support for cross compilers we read -mtune=native
3002 as -mtune=generic. With native compilers we won't see the
3003 -mtune=native, as it was changed by the driver. */
3004 || !strcmp (ix86_tune_string, "native"))
3007 ix86_tune_string = "generic64";
3009 ix86_tune_string = "generic32";
3011 /* If this call is for setting the option attribute, allow the
3012 generic32/generic64 that was previously set. */
3013 else if (!main_args_p
3014 && (!strcmp (ix86_tune_string, "generic32")
3015 || !strcmp (ix86_tune_string, "generic64")))
3017 else if (!strncmp (ix86_tune_string, "generic", 7))
3018 error ("bad value (%s) for %stune=%s %s",
3019 ix86_tune_string, prefix, suffix, sw);
3020 else if (!strcmp (ix86_tune_string, "x86-64"))
3021 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
3022 "%stune=k8%s or %stune=generic%s instead as appropriate.",
3023 prefix, suffix, prefix, suffix, prefix, suffix);
3027 if (ix86_arch_string)
3028 ix86_tune_string = ix86_arch_string;
3029 if (!ix86_tune_string)
3031 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3032 ix86_tune_defaulted = 1;
3035 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3036 need to use a sensible tune option. */
3037 if (!strcmp (ix86_tune_string, "generic")
3038 || !strcmp (ix86_tune_string, "x86-64")
3039 || !strcmp (ix86_tune_string, "i686"))
3042 ix86_tune_string = "generic64";
3044 ix86_tune_string = "generic32";
3048 if (ix86_stringop_string)
3050 if (!strcmp (ix86_stringop_string, "rep_byte"))
3051 stringop_alg = rep_prefix_1_byte;
3052 else if (!strcmp (ix86_stringop_string, "libcall"))
3053 stringop_alg = libcall;
3054 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
3055 stringop_alg = rep_prefix_4_byte;
3056 else if (!strcmp (ix86_stringop_string, "rep_8byte")
3058 /* rep; movq isn't available in 32-bit code. */
3059 stringop_alg = rep_prefix_8_byte;
3060 else if (!strcmp (ix86_stringop_string, "byte_loop"))
3061 stringop_alg = loop_1_byte;
3062 else if (!strcmp (ix86_stringop_string, "loop"))
3063 stringop_alg = loop;
3064 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
3065 stringop_alg = unrolled_loop;
3067 error ("bad value (%s) for %sstringop-strategy=%s %s",
3068 ix86_stringop_string, prefix, suffix, sw);
3071 if (!ix86_arch_string)
3072 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3074 ix86_arch_specified = 1;
3076 /* Validate -mabi= value. */
3077 if (ix86_abi_string)
3079 if (strcmp (ix86_abi_string, "sysv") == 0)
3080 ix86_abi = SYSV_ABI;
3081 else if (strcmp (ix86_abi_string, "ms") == 0)
3084 error ("unknown ABI (%s) for %sabi=%s %s",
3085 ix86_abi_string, prefix, suffix, sw);
3088 ix86_abi = DEFAULT_ABI;
3090 if (ix86_cmodel_string != 0)
3092 if (!strcmp (ix86_cmodel_string, "small"))
3093 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3094 else if (!strcmp (ix86_cmodel_string, "medium"))
3095 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
3096 else if (!strcmp (ix86_cmodel_string, "large"))
3097 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
3099 error ("code model %s does not support PIC mode", ix86_cmodel_string);
3100 else if (!strcmp (ix86_cmodel_string, "32"))
3101 ix86_cmodel = CM_32;
3102 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
3103 ix86_cmodel = CM_KERNEL;
3105 error ("bad value (%s) for %scmodel=%s %s",
3106 ix86_cmodel_string, prefix, suffix, sw);
3110 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3111 use of rip-relative addressing. This eliminates fixups that
3112 would otherwise be needed if this object is to be placed in a
3113 DLL, and is essentially just as efficient as direct addressing. */
3114 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3115 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3116 else if (TARGET_64BIT)
3117 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3119 ix86_cmodel = CM_32;
3121 if (ix86_asm_string != 0)
3124 && !strcmp (ix86_asm_string, "intel"))
3125 ix86_asm_dialect = ASM_INTEL;
3126 else if (!strcmp (ix86_asm_string, "att"))
3127 ix86_asm_dialect = ASM_ATT;
3129 error ("bad value (%s) for %sasm=%s %s",
3130 ix86_asm_string, prefix, suffix, sw);
3132 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
3133 error ("code model %qs not supported in the %s bit mode",
3134 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
3135 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3136 sorry ("%i-bit mode not compiled in",
3137 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3139 for (i = 0; i < pta_size; i++)
3140 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3142 ix86_schedule = processor_alias_table[i].schedule;
3143 ix86_arch = processor_alias_table[i].processor;
3144 /* Default cpu tuning to the architecture. */
3145 ix86_tune = ix86_arch;
3147 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3148 error ("CPU you selected does not support x86-64 "
3151 if (processor_alias_table[i].flags & PTA_MMX
3152 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3153 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3154 if (processor_alias_table[i].flags & PTA_3DNOW
3155 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3156 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3157 if (processor_alias_table[i].flags & PTA_3DNOW_A
3158 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3159 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3160 if (processor_alias_table[i].flags & PTA_SSE
3161 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3162 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3163 if (processor_alias_table[i].flags & PTA_SSE2
3164 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3165 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3166 if (processor_alias_table[i].flags & PTA_SSE3
3167 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3168 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3169 if (processor_alias_table[i].flags & PTA_SSSE3
3170 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3171 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3172 if (processor_alias_table[i].flags & PTA_SSE4_1
3173 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3174 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3175 if (processor_alias_table[i].flags & PTA_SSE4_2
3176 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3177 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3178 if (processor_alias_table[i].flags & PTA_AVX
3179 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3180 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3181 if (processor_alias_table[i].flags & PTA_FMA
3182 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3183 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3184 if (processor_alias_table[i].flags & PTA_SSE4A
3185 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3186 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3187 if (processor_alias_table[i].flags & PTA_FMA4
3188 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3189 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3190 if (processor_alias_table[i].flags & PTA_XOP
3191 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3192 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3193 if (processor_alias_table[i].flags & PTA_LWP
3194 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3195 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3196 if (processor_alias_table[i].flags & PTA_ABM
3197 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3198 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3199 if (processor_alias_table[i].flags & PTA_CX16
3200 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3201 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3202 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3203 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3204 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3205 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3206 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3207 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3208 if (processor_alias_table[i].flags & PTA_MOVBE
3209 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3210 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3211 if (processor_alias_table[i].flags & PTA_AES
3212 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3213 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3214 if (processor_alias_table[i].flags & PTA_PCLMUL
3215 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3216 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3217 if (processor_alias_table[i].flags & PTA_FSGSBASE
3218 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3219 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3220 if (processor_alias_table[i].flags & PTA_RDRND
3221 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3222 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3223 if (processor_alias_table[i].flags & PTA_F16C
3224 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3225 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3226 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3227 x86_prefetch_sse = true;
3232 if (!strcmp (ix86_arch_string, "generic"))
3233 error ("generic CPU can be used only for %stune=%s %s",
3234 prefix, suffix, sw);
3235 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3236 error ("bad value (%s) for %sarch=%s %s",
3237 ix86_arch_string, prefix, suffix, sw);
3239 ix86_arch_mask = 1u << ix86_arch;
3240 for (i = 0; i < X86_ARCH_LAST; ++i)
3241 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3243 for (i = 0; i < pta_size; i++)
3244 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3246 ix86_schedule = processor_alias_table[i].schedule;
3247 ix86_tune = processor_alias_table[i].processor;
3248 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3250 if (ix86_tune_defaulted)
3252 ix86_tune_string = "x86-64";
3253 for (i = 0; i < pta_size; i++)
3254 if (! strcmp (ix86_tune_string,
3255 processor_alias_table[i].name))
3257 ix86_schedule = processor_alias_table[i].schedule;
3258 ix86_tune = processor_alias_table[i].processor;
3261 error ("CPU you selected does not support x86-64 "
3264 /* Intel CPUs have always interpreted SSE prefetch instructions as
3265 NOPs; so, we can enable SSE prefetch instructions even when
3266 -mtune (rather than -march) points us to a processor that has them.
3267 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3268 higher processors. */
3270 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3271 x86_prefetch_sse = true;
3275 if (ix86_tune_specified && i == pta_size)
3276 error ("bad value (%s) for %stune=%s %s",
3277 ix86_tune_string, prefix, suffix, sw);
3279 ix86_tune_mask = 1u << ix86_tune;
3280 for (i = 0; i < X86_TUNE_LAST; ++i)
3281 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3284 ix86_cost = &ix86_size_cost;
3286 ix86_cost = processor_target_table[ix86_tune].cost;
3288 /* Arrange to set up i386_stack_locals for all functions. */
3289 init_machine_status = ix86_init_machine_status;
3291 /* Validate -mregparm= value. */
3292 if (ix86_regparm_string)
3295 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3296 i = atoi (ix86_regparm_string);
3297 if (i < 0 || i > REGPARM_MAX)
3298 error ("%sregparm=%d%s is not between 0 and %d",
3299 prefix, i, suffix, REGPARM_MAX);
3304 ix86_regparm = REGPARM_MAX;
3306 /* If the user has provided any of the -malign-* options,
3307 warn and use that value only if -falign-* is not set.
3308 Remove this code in GCC 3.2 or later. */
3309 if (ix86_align_loops_string)
3311 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3312 prefix, suffix, suffix);
3313 if (align_loops == 0)
3315 i = atoi (ix86_align_loops_string);
3316 if (i < 0 || i > MAX_CODE_ALIGN)
3317 error ("%salign-loops=%d%s is not between 0 and %d",
3318 prefix, i, suffix, MAX_CODE_ALIGN);
3320 align_loops = 1 << i;
3324 if (ix86_align_jumps_string)
3326 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3327 prefix, suffix, suffix);
3328 if (align_jumps == 0)
3330 i = atoi (ix86_align_jumps_string);
3331 if (i < 0 || i > MAX_CODE_ALIGN)
3332 error ("%salign-loops=%d%s is not between 0 and %d",
3333 prefix, i, suffix, MAX_CODE_ALIGN);
3335 align_jumps = 1 << i;
3339 if (ix86_align_funcs_string)
3341 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3342 prefix, suffix, suffix);
3343 if (align_functions == 0)
3345 i = atoi (ix86_align_funcs_string);
3346 if (i < 0 || i > MAX_CODE_ALIGN)
3347 error ("%salign-loops=%d%s is not between 0 and %d",
3348 prefix, i, suffix, MAX_CODE_ALIGN);
3350 align_functions = 1 << i;
3354 /* Default align_* from the processor table. */
3355 if (align_loops == 0)
3357 align_loops = processor_target_table[ix86_tune].align_loop;
3358 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3360 if (align_jumps == 0)
3362 align_jumps = processor_target_table[ix86_tune].align_jump;
3363 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3365 if (align_functions == 0)
3367 align_functions = processor_target_table[ix86_tune].align_func;
3370 /* Validate -mbranch-cost= value, or provide default. */
3371 ix86_branch_cost = ix86_cost->branch_cost;
3372 if (ix86_branch_cost_string)
3374 i = atoi (ix86_branch_cost_string);
3376 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3378 ix86_branch_cost = i;
3380 if (ix86_section_threshold_string)
3382 i = atoi (ix86_section_threshold_string);
3384 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3386 ix86_section_threshold = i;
3389 if (ix86_tls_dialect_string)
3391 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3392 ix86_tls_dialect = TLS_DIALECT_GNU;
3393 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3394 ix86_tls_dialect = TLS_DIALECT_GNU2;
3396 error ("bad value (%s) for %stls-dialect=%s %s",
3397 ix86_tls_dialect_string, prefix, suffix, sw);
3400 if (ix87_precision_string)
3402 i = atoi (ix87_precision_string);
3403 if (i != 32 && i != 64 && i != 80)
3404 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3409 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3411 /* Enable by default the SSE and MMX builtins. Do allow the user to
3412 explicitly disable any of these. In particular, disabling SSE and
3413 MMX for kernel code is extremely useful. */
3414 if (!ix86_arch_specified)
3416 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3417 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3420 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3424 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3426 if (!ix86_arch_specified)
3428 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3430 /* i386 ABI does not specify red zone. It still makes sense to use it
3431 when programmer takes care to stack from being destroyed. */
3432 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3433 target_flags |= MASK_NO_RED_ZONE;
3436 /* Keep nonleaf frame pointers. */
3437 if (flag_omit_frame_pointer)
3438 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3439 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3440 flag_omit_frame_pointer = 1;
3442 /* If we're doing fast math, we don't care about comparison order
3443 wrt NaNs. This lets us use a shorter comparison sequence. */
3444 if (flag_finite_math_only)
3445 target_flags &= ~MASK_IEEE_FP;
3447 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3448 since the insns won't need emulation. */
3449 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3450 target_flags &= ~MASK_NO_FANCY_MATH_387;
3452 /* Likewise, if the target doesn't have a 387, or we've specified
3453 software floating point, don't use 387 inline intrinsics. */
3455 target_flags |= MASK_NO_FANCY_MATH_387;
3457 /* Turn on MMX builtins for -msse. */
3460 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3461 x86_prefetch_sse = true;
3464 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3465 if (TARGET_SSE4_2 || TARGET_ABM)
3466 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3468 /* Validate -mpreferred-stack-boundary= value or default it to
3469 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3470 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3471 if (ix86_preferred_stack_boundary_string)
3473 i = atoi (ix86_preferred_stack_boundary_string);
3474 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3475 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3476 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3478 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3481 /* Set the default value for -mstackrealign. */
3482 if (ix86_force_align_arg_pointer == -1)
3483 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3485 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3487 /* Validate -mincoming-stack-boundary= value or default it to
3488 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3489 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3490 if (ix86_incoming_stack_boundary_string)
3492 i = atoi (ix86_incoming_stack_boundary_string);
3493 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3494 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3495 i, TARGET_64BIT ? 4 : 2);
3498 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3499 ix86_incoming_stack_boundary
3500 = ix86_user_incoming_stack_boundary;
3504 /* Accept -msseregparm only if at least SSE support is enabled. */
3505 if (TARGET_SSEREGPARM
3507 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3509 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3510 if (ix86_fpmath_string != 0)
3512 if (! strcmp (ix86_fpmath_string, "387"))
3513 ix86_fpmath = FPMATH_387;
3514 else if (! strcmp (ix86_fpmath_string, "sse"))
3518 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3519 ix86_fpmath = FPMATH_387;
3522 ix86_fpmath = FPMATH_SSE;
3524 else if (! strcmp (ix86_fpmath_string, "387,sse")
3525 || ! strcmp (ix86_fpmath_string, "387+sse")
3526 || ! strcmp (ix86_fpmath_string, "sse,387")
3527 || ! strcmp (ix86_fpmath_string, "sse+387")
3528 || ! strcmp (ix86_fpmath_string, "both"))
3532 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3533 ix86_fpmath = FPMATH_387;
3535 else if (!TARGET_80387)
3537 warning (0, "387 instruction set disabled, using SSE arithmetics");
3538 ix86_fpmath = FPMATH_SSE;
3541 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3544 error ("bad value (%s) for %sfpmath=%s %s",
3545 ix86_fpmath_string, prefix, suffix, sw);
3548 /* If the i387 is disabled, then do not return values in it. */
3550 target_flags &= ~MASK_FLOAT_RETURNS;
3552 /* Use external vectorized library in vectorizing intrinsics. */
3553 if (ix86_veclibabi_string)
3555 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3556 ix86_veclib_handler = ix86_veclibabi_svml;
3557 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3558 ix86_veclib_handler = ix86_veclibabi_acml;
3560 error ("unknown vectorization library ABI type (%s) for "
3561 "%sveclibabi=%s %s", ix86_veclibabi_string,
3562 prefix, suffix, sw);
3565 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
3566 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3568 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3570 /* ??? Unwind info is not correct around the CFG unless either a frame
3571 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3572 unwind info generation to be aware of the CFG and propagating states
3574 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3575 || flag_exceptions || flag_non_call_exceptions)
3576 && flag_omit_frame_pointer
3577 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3579 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3580 warning (0, "unwind tables currently require either a frame pointer "
3581 "or %saccumulate-outgoing-args%s for correctness",
3583 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3586 /* If stack probes are required, the space used for large function
3587 arguments on the stack must also be probed, so enable
3588 -maccumulate-outgoing-args so this happens in the prologue. */
3589 if (TARGET_STACK_PROBE
3590 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3592 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3593 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3594 "for correctness", prefix, suffix);
3595 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3598 /* For sane SSE instruction set generation we need fcomi instruction.
3599 It is safe to enable all CMOVE instructions. */
3603 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3606 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3607 p = strchr (internal_label_prefix, 'X');
3608 internal_label_prefix_len = p - internal_label_prefix;
3612 /* When scheduling description is not available, disable scheduler pass
3613 so it won't slow down the compilation and make x87 code slower. */
3614 if (!TARGET_SCHEDULE)
3615 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3617 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3618 set_param_value ("simultaneous-prefetches",
3619 ix86_cost->simultaneous_prefetches);
3620 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3621 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3622 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3623 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3624 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3625 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3627 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3628 if (flag_prefetch_loop_arrays < 0
3631 && software_prefetching_beneficial_p ())
3632 flag_prefetch_loop_arrays = 1;
3634 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3635 can be optimized to ap = __builtin_next_arg (0). */
3637 targetm.expand_builtin_va_start = NULL;
3641 ix86_gen_leave = gen_leave_rex64;
3642 ix86_gen_pop1 = gen_popdi1;
3643 ix86_gen_add3 = gen_adddi3;
3644 ix86_gen_sub3 = gen_subdi3;
3645 ix86_gen_sub3_carry = gen_subdi3_carry;
3646 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3647 ix86_gen_monitor = gen_sse3_monitor64;
3648 ix86_gen_andsp = gen_anddi3;
3649 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_64;
3650 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
3651 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
3655 ix86_gen_leave = gen_leave;
3656 ix86_gen_pop1 = gen_popsi1;
3657 ix86_gen_add3 = gen_addsi3;
3658 ix86_gen_sub3 = gen_subsi3;
3659 ix86_gen_sub3_carry = gen_subsi3_carry;
3660 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3661 ix86_gen_monitor = gen_sse3_monitor;
3662 ix86_gen_andsp = gen_andsi3;
3663 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_32;
3664 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
3665 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
3669 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3671 target_flags |= MASK_CLD & ~target_flags_explicit;
3674 /* Save the initial options in case the user does function specific options */
3676 target_option_default_node = target_option_current_node
3677 = build_target_option_node ();
3680 /* Update register usage after having seen the compiler flags. */
3683 ix86_conditional_register_usage (void)
3688 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3690 if (fixed_regs[i] > 1)
3691 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
3692 if (call_used_regs[i] > 1)
3693 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
3696 /* The PIC register, if it exists, is fixed. */
3697 j = PIC_OFFSET_TABLE_REGNUM;
3698 if (j != INVALID_REGNUM)
3699 fixed_regs[j] = call_used_regs[j] = 1;
3701 /* The MS_ABI changes the set of call-used registers. */
3702 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3704 call_used_regs[SI_REG] = 0;
3705 call_used_regs[DI_REG] = 0;
3706 call_used_regs[XMM6_REG] = 0;
3707 call_used_regs[XMM7_REG] = 0;
3708 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3709 call_used_regs[i] = 0;
3712 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3713 other call-clobbered regs for 64-bit. */
3716 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
3718 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3719 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
3720 && call_used_regs[i])
3721 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
3724 /* If MMX is disabled, squash the registers. */
3726 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3727 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
3728 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3730 /* If SSE is disabled, squash the registers. */
3732 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3733 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
3734 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3736 /* If the FPU is disabled, squash the registers. */
3737 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3738 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3739 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
3740 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3742 /* If 32-bit, squash the 64-bit registers. */
3745 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
3747 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3753 /* Save the current options */
3756 ix86_function_specific_save (struct cl_target_option *ptr)
3758 ptr->arch = ix86_arch;
3759 ptr->schedule = ix86_schedule;
3760 ptr->tune = ix86_tune;
3761 ptr->fpmath = ix86_fpmath;
3762 ptr->branch_cost = ix86_branch_cost;
3763 ptr->tune_defaulted = ix86_tune_defaulted;
3764 ptr->arch_specified = ix86_arch_specified;
3765 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3766 ptr->target_flags_explicit = target_flags_explicit;
3768 /* The fields are char but the variables are not; make sure the
3769 values fit in the fields. */
3770 gcc_assert (ptr->arch == ix86_arch);
3771 gcc_assert (ptr->schedule == ix86_schedule);
3772 gcc_assert (ptr->tune == ix86_tune);
3773 gcc_assert (ptr->fpmath == ix86_fpmath);
3774 gcc_assert (ptr->branch_cost == ix86_branch_cost);
3777 /* Restore the current options */
3780 ix86_function_specific_restore (struct cl_target_option *ptr)
3782 enum processor_type old_tune = ix86_tune;
3783 enum processor_type old_arch = ix86_arch;
3784 unsigned int ix86_arch_mask, ix86_tune_mask;
3787 ix86_arch = (enum processor_type) ptr->arch;
3788 ix86_schedule = (enum attr_cpu) ptr->schedule;
3789 ix86_tune = (enum processor_type) ptr->tune;
3790 ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
3791 ix86_branch_cost = ptr->branch_cost;
3792 ix86_tune_defaulted = ptr->tune_defaulted;
3793 ix86_arch_specified = ptr->arch_specified;
3794 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3795 target_flags_explicit = ptr->target_flags_explicit;
3797 /* Recreate the arch feature tests if the arch changed */
3798 if (old_arch != ix86_arch)
3800 ix86_arch_mask = 1u << ix86_arch;
3801 for (i = 0; i < X86_ARCH_LAST; ++i)
3802 ix86_arch_features[i]
3803 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3806 /* Recreate the tune optimization tests */
3807 if (old_tune != ix86_tune)
3809 ix86_tune_mask = 1u << ix86_tune;
3810 for (i = 0; i < X86_TUNE_LAST; ++i)
3811 ix86_tune_features[i]
3812 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3816 /* Print the current options */
3819 ix86_function_specific_print (FILE *file, int indent,
3820 struct cl_target_option *ptr)
3823 = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3824 NULL, NULL, NULL, false);
3826 fprintf (file, "%*sarch = %d (%s)\n",
3829 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3830 ? cpu_names[ptr->arch]
3833 fprintf (file, "%*stune = %d (%s)\n",
3836 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3837 ? cpu_names[ptr->tune]
3840 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3841 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3842 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3843 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3847 fprintf (file, "%*s%s\n", indent, "", target_string);
3848 free (target_string);
3853 /* Inner function to process the attribute((target(...))), take an argument and
3854 set the current options from the argument. If we have a list, recursively go
3858 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3863 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3864 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3865 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3866 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3881 enum ix86_opt_type type;
3886 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3887 IX86_ATTR_ISA ("abm", OPT_mabm),
3888 IX86_ATTR_ISA ("aes", OPT_maes),
3889 IX86_ATTR_ISA ("avx", OPT_mavx),
3890 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3891 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3892 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3893 IX86_ATTR_ISA ("sse", OPT_msse),
3894 IX86_ATTR_ISA ("sse2", OPT_msse2),
3895 IX86_ATTR_ISA ("sse3", OPT_msse3),
3896 IX86_ATTR_ISA ("sse4", OPT_msse4),
3897 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3898 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3899 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3900 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3901 IX86_ATTR_ISA ("fma4", OPT_mfma4),
3902 IX86_ATTR_ISA ("xop", OPT_mxop),
3903 IX86_ATTR_ISA ("lwp", OPT_mlwp),
3904 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
3905 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
3906 IX86_ATTR_ISA ("f16c", OPT_mf16c),
3908 /* string options */
3909 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3910 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3911 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3914 IX86_ATTR_YES ("cld",
3918 IX86_ATTR_NO ("fancy-math-387",
3919 OPT_mfancy_math_387,
3920 MASK_NO_FANCY_MATH_387),
3922 IX86_ATTR_YES ("ieee-fp",
3926 IX86_ATTR_YES ("inline-all-stringops",
3927 OPT_minline_all_stringops,
3928 MASK_INLINE_ALL_STRINGOPS),
3930 IX86_ATTR_YES ("inline-stringops-dynamically",
3931 OPT_minline_stringops_dynamically,
3932 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3934 IX86_ATTR_NO ("align-stringops",
3935 OPT_mno_align_stringops,
3936 MASK_NO_ALIGN_STRINGOPS),
3938 IX86_ATTR_YES ("recip",
3944 /* If this is a list, recurse to get the options. */
3945 if (TREE_CODE (args) == TREE_LIST)
3949 for (; args; args = TREE_CHAIN (args))
3950 if (TREE_VALUE (args)
3951 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3957 else if (TREE_CODE (args) != STRING_CST)
3960 /* Handle multiple arguments separated by commas. */
3961 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
3963 while (next_optstr && *next_optstr != '\0')
3965 char *p = next_optstr;
3967 char *comma = strchr (next_optstr, ',');
3968 const char *opt_string;
3969 size_t len, opt_len;
3974 enum ix86_opt_type type = ix86_opt_unknown;
3980 len = comma - next_optstr;
3981 next_optstr = comma + 1;
3989 /* Recognize no-xxx. */
3990 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
3999 /* Find the option. */
4002 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4004 type = attrs[i].type;
4005 opt_len = attrs[i].len;
4006 if (ch == attrs[i].string[0]
4007 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
4008 && memcmp (p, attrs[i].string, opt_len) == 0)
4011 mask = attrs[i].mask;
4012 opt_string = attrs[i].string;
4017 /* Process the option. */
4020 error ("attribute(target(\"%s\")) is unknown", orig_p);
4024 else if (type == ix86_opt_isa)
4025 ix86_handle_option (opt, p, opt_set_p);
4027 else if (type == ix86_opt_yes || type == ix86_opt_no)
4029 if (type == ix86_opt_no)
4030 opt_set_p = !opt_set_p;
4033 target_flags |= mask;
4035 target_flags &= ~mask;
4038 else if (type == ix86_opt_str)
4042 error ("option(\"%s\") was already specified", opt_string);
4046 p_strings[opt] = xstrdup (p + opt_len);
4056 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4059 ix86_valid_target_attribute_tree (tree args)
4061 const char *orig_arch_string = ix86_arch_string;
4062 const char *orig_tune_string = ix86_tune_string;
4063 const char *orig_fpmath_string = ix86_fpmath_string;
4064 int orig_tune_defaulted = ix86_tune_defaulted;
4065 int orig_arch_specified = ix86_arch_specified;
4066 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
4069 struct cl_target_option *def
4070 = TREE_TARGET_OPTION (target_option_default_node);
4072 /* Process each of the options on the chain. */
4073 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
4076 /* If the changed options are different from the default, rerun override_options,
4077 and then save the options away. The string options are are attribute options,
4078 and will be undone when we copy the save structure. */
4079 if (ix86_isa_flags != def->ix86_isa_flags
4080 || target_flags != def->target_flags
4081 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4082 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4083 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4085 /* If we are using the default tune= or arch=, undo the string assigned,
4086 and use the default. */
4087 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4088 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4089 else if (!orig_arch_specified)
4090 ix86_arch_string = NULL;
4092 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4093 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4094 else if (orig_tune_defaulted)
4095 ix86_tune_string = NULL;
4097 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4098 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4099 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
4100 else if (!TARGET_64BIT && TARGET_SSE)
4101 ix86_fpmath_string = "sse,387";
4103 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4104 override_options (false);
4106 /* Add any builtin functions with the new isa if any. */
4107 ix86_add_new_builtins (ix86_isa_flags);
4109 /* Save the current options unless we are validating options for
4111 t = build_target_option_node ();
4113 ix86_arch_string = orig_arch_string;
4114 ix86_tune_string = orig_tune_string;
4115 ix86_fpmath_string = orig_fpmath_string;
4117 /* Free up memory allocated to hold the strings */
4118 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4119 if (option_strings[i])
4120 free (option_strings[i]);
4126 /* Hook to validate attribute((target("string"))). */
4129 ix86_valid_target_attribute_p (tree fndecl,
4130 tree ARG_UNUSED (name),
4132 int ARG_UNUSED (flags))
4134 struct cl_target_option cur_target;
4136 tree old_optimize = build_optimization_node ();
4137 tree new_target, new_optimize;
4138 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4140 /* If the function changed the optimization levels as well as setting target
4141 options, start with the optimizations specified. */
4142 if (func_optimize && func_optimize != old_optimize)
4143 cl_optimization_restore (TREE_OPTIMIZATION (func_optimize));
4145 /* The target attributes may also change some optimization flags, so update
4146 the optimization options if necessary. */
4147 cl_target_option_save (&cur_target);
4148 new_target = ix86_valid_target_attribute_tree (args);
4149 new_optimize = build_optimization_node ();
4156 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4158 if (old_optimize != new_optimize)
4159 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4162 cl_target_option_restore (&cur_target);
4164 if (old_optimize != new_optimize)
4165 cl_optimization_restore (TREE_OPTIMIZATION (old_optimize));
4171 /* Hook to determine if one function can safely inline another. */
4174 ix86_can_inline_p (tree caller, tree callee)
4177 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4178 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4180 /* If callee has no option attributes, then it is ok to inline. */
4184 /* If caller has no option attributes, but callee does then it is not ok to
4186 else if (!caller_tree)
4191 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4192 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4194 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4195 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4197 if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
4198 != callee_opts->ix86_isa_flags)
4201 /* See if we have the same non-isa options. */
4202 else if (caller_opts->target_flags != callee_opts->target_flags)
4205 /* See if arch, tune, etc. are the same. */
4206 else if (caller_opts->arch != callee_opts->arch)
4209 else if (caller_opts->tune != callee_opts->tune)
4212 else if (caller_opts->fpmath != callee_opts->fpmath)
4215 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4226 /* Remember the last target of ix86_set_current_function. */
4227 static GTY(()) tree ix86_previous_fndecl;
4229 /* Establish appropriate back-end context for processing the function
4230 FNDECL. The argument might be NULL to indicate processing at top
4231 level, outside of any function scope. */
4233 ix86_set_current_function (tree fndecl)
4235 /* Only change the context if the function changes. This hook is called
4236 several times in the course of compiling a function, and we don't want to
4237 slow things down too much or call target_reinit when it isn't safe. */
4238 if (fndecl && fndecl != ix86_previous_fndecl)
4240 tree old_tree = (ix86_previous_fndecl
4241 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4244 tree new_tree = (fndecl
4245 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4248 ix86_previous_fndecl = fndecl;
4249 if (old_tree == new_tree)
4254 cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
4260 struct cl_target_option *def
4261 = TREE_TARGET_OPTION (target_option_current_node);
4263 cl_target_option_restore (def);
4270 /* Return true if this goes in large data/bss. */
4273 ix86_in_large_data_p (tree exp)
4275 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4278 /* Functions are never large data. */
4279 if (TREE_CODE (exp) == FUNCTION_DECL)
4282 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4284 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4285 if (strcmp (section, ".ldata") == 0
4286 || strcmp (section, ".lbss") == 0)
4292 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4294 /* If this is an incomplete type with size 0, then we can't put it
4295 in data because it might be too big when completed. */
4296 if (!size || size > ix86_section_threshold)
4303 /* Switch to the appropriate section for output of DECL.
4304 DECL is either a `VAR_DECL' node or a constant of some sort.
4305 RELOC indicates whether forming the initial value of DECL requires
4306 link-time relocations. */
4308 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4312 x86_64_elf_select_section (tree decl, int reloc,
4313 unsigned HOST_WIDE_INT align)
4315 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4316 && ix86_in_large_data_p (decl))
4318 const char *sname = NULL;
4319 unsigned int flags = SECTION_WRITE;
4320 switch (categorize_decl_for_section (decl, reloc))
4325 case SECCAT_DATA_REL:
4326 sname = ".ldata.rel";
4328 case SECCAT_DATA_REL_LOCAL:
4329 sname = ".ldata.rel.local";
4331 case SECCAT_DATA_REL_RO:
4332 sname = ".ldata.rel.ro";
4334 case SECCAT_DATA_REL_RO_LOCAL:
4335 sname = ".ldata.rel.ro.local";
4339 flags |= SECTION_BSS;
4342 case SECCAT_RODATA_MERGE_STR:
4343 case SECCAT_RODATA_MERGE_STR_INIT:
4344 case SECCAT_RODATA_MERGE_CONST:
4348 case SECCAT_SRODATA:
4355 /* We don't split these for medium model. Place them into
4356 default sections and hope for best. */
4358 case SECCAT_EMUTLS_VAR:
4359 case SECCAT_EMUTLS_TMPL:
4364 /* We might get called with string constants, but get_named_section
4365 doesn't like them as they are not DECLs. Also, we need to set
4366 flags in that case. */
4368 return get_section (sname, flags, NULL);
4369 return get_named_section (decl, sname, reloc);
4372 return default_elf_select_section (decl, reloc, align);
4375 /* Build up a unique section name, expressed as a
4376 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4377 RELOC indicates whether the initial value of EXP requires
4378 link-time relocations. */
4380 static void ATTRIBUTE_UNUSED
4381 x86_64_elf_unique_section (tree decl, int reloc)
4383 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4384 && ix86_in_large_data_p (decl))
4386 const char *prefix = NULL;
4387 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4388 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4390 switch (categorize_decl_for_section (decl, reloc))
4393 case SECCAT_DATA_REL:
4394 case SECCAT_DATA_REL_LOCAL:
4395 case SECCAT_DATA_REL_RO:
4396 case SECCAT_DATA_REL_RO_LOCAL:
4397 prefix = one_only ? ".ld" : ".ldata";
4400 prefix = one_only ? ".lb" : ".lbss";
4403 case SECCAT_RODATA_MERGE_STR:
4404 case SECCAT_RODATA_MERGE_STR_INIT:
4405 case SECCAT_RODATA_MERGE_CONST:
4406 prefix = one_only ? ".lr" : ".lrodata";
4408 case SECCAT_SRODATA:
4415 /* We don't split these for medium model. Place them into
4416 default sections and hope for best. */
4418 case SECCAT_EMUTLS_VAR:
4419 prefix = targetm.emutls.var_section;
4421 case SECCAT_EMUTLS_TMPL:
4422 prefix = targetm.emutls.tmpl_section;
4427 const char *name, *linkonce;
4430 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4431 name = targetm.strip_name_encoding (name);
4433 /* If we're using one_only, then there needs to be a .gnu.linkonce
4434 prefix to the section name. */
4435 linkonce = one_only ? ".gnu.linkonce" : "";
4437 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4439 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4443 default_unique_section (decl, reloc);
4446 #ifdef COMMON_ASM_OP
4447 /* This says how to output assembler code to declare an
4448 uninitialized external linkage data object.
4450 For medium model x86-64 we need to use .largecomm opcode for
4453 x86_elf_aligned_common (FILE *file,
4454 const char *name, unsigned HOST_WIDE_INT size,
4457 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4458 && size > (unsigned int)ix86_section_threshold)
4459 fputs (".largecomm\t", file);
4461 fputs (COMMON_ASM_OP, file);
4462 assemble_name (file, name);
4463 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4464 size, align / BITS_PER_UNIT);
4468 /* Utility function for targets to use in implementing
4469 ASM_OUTPUT_ALIGNED_BSS. */
4472 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4473 const char *name, unsigned HOST_WIDE_INT size,
4476 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4477 && size > (unsigned int)ix86_section_threshold)
4478 switch_to_section (get_named_section (decl, ".lbss", 0));
4480 switch_to_section (bss_section);
4481 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4482 #ifdef ASM_DECLARE_OBJECT_NAME
4483 last_assemble_variable_decl = decl;
4484 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4486 /* Standard thing is just output label for the object. */
4487 ASM_OUTPUT_LABEL (file, name);
4488 #endif /* ASM_DECLARE_OBJECT_NAME */
4489 ASM_OUTPUT_SKIP (file, size ? size : 1);
4493 optimization_options (int level, int size ATTRIBUTE_UNUSED)
4495 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
4496 make the problem with not enough registers even worse. */
4497 #ifdef INSN_SCHEDULING
4499 flag_schedule_insns = 0;
4503 /* The Darwin libraries never set errno, so we might as well
4504 avoid calling them when that's the only reason we would. */
4505 flag_errno_math = 0;
4507 /* The default values of these switches depend on the TARGET_64BIT
4508 that is not known at this moment. Mark these values with 2 and
4509 let user the to override these. In case there is no command line option
4510 specifying them, we will set the defaults in override_options. */
4512 flag_omit_frame_pointer = 2;
4514 /* For -O2 and beyond, turn on -fzee for x86_64 target. */
4518 flag_pcc_struct_return = 2;
4519 flag_asynchronous_unwind_tables = 2;
4520 flag_vect_cost_model = 1;
4521 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4522 SUBTARGET_OPTIMIZATION_OPTIONS;
4526 /* Decide whether we must probe the stack before any space allocation
4527 on this target. It's essentially TARGET_STACK_PROBE except when
4528 -fstack-check causes the stack to be already probed differently. */
4531 ix86_target_stack_probe (void)
4533 /* Do not probe the stack twice if static stack checking is enabled. */
4534 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
4537 return TARGET_STACK_PROBE;
4540 /* Decide whether we can make a sibling call to a function. DECL is the
4541 declaration of the function being targeted by the call and EXP is the
4542 CALL_EXPR representing the call. */
4545 ix86_function_ok_for_sibcall (tree decl, tree exp)
4547 tree type, decl_or_type;
4550 /* If we are generating position-independent code, we cannot sibcall
4551 optimize any indirect call, or a direct call to a global function,
4552 as the PLT requires %ebx be live. */
4553 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4556 /* If we need to align the outgoing stack, then sibcalling would
4557 unalign the stack, which may break the called function. */
4558 if (ix86_minimum_incoming_stack_boundary (true)
4559 < PREFERRED_STACK_BOUNDARY)
4564 decl_or_type = decl;
4565 type = TREE_TYPE (decl);
4569 /* We're looking at the CALL_EXPR, we need the type of the function. */
4570 type = CALL_EXPR_FN (exp); /* pointer expression */
4571 type = TREE_TYPE (type); /* pointer type */
4572 type = TREE_TYPE (type); /* function type */
4573 decl_or_type = type;
4576 /* Check that the return value locations are the same. Like
4577 if we are returning floats on the 80387 register stack, we cannot
4578 make a sibcall from a function that doesn't return a float to a
4579 function that does or, conversely, from a function that does return
4580 a float to a function that doesn't; the necessary stack adjustment
4581 would not be executed. This is also the place we notice
4582 differences in the return value ABI. Note that it is ok for one
4583 of the functions to have void return type as long as the return
4584 value of the other is passed in a register. */
4585 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4586 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4588 if (STACK_REG_P (a) || STACK_REG_P (b))
4590 if (!rtx_equal_p (a, b))
4593 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4595 else if (!rtx_equal_p (a, b))
4600 /* The SYSV ABI has more call-clobbered registers;
4601 disallow sibcalls from MS to SYSV. */
4602 if (cfun->machine->call_abi == MS_ABI
4603 && ix86_function_type_abi (type) == SYSV_ABI)
4608 /* If this call is indirect, we'll need to be able to use a
4609 call-clobbered register for the address of the target function.
4610 Make sure that all such registers are not used for passing
4611 parameters. Note that DLLIMPORT functions are indirect. */
4613 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4615 if (ix86_function_regparm (type, NULL) >= 3)
4617 /* ??? Need to count the actual number of registers to be used,
4618 not the possible number of registers. Fix later. */
4624 /* Otherwise okay. That also includes certain types of indirect calls. */
4628 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4629 and "sseregparm" calling convention attributes;
4630 arguments as in struct attribute_spec.handler. */
4633 ix86_handle_cconv_attribute (tree *node, tree name,
4635 int flags ATTRIBUTE_UNUSED,
4638 if (TREE_CODE (*node) != FUNCTION_TYPE
4639 && TREE_CODE (*node) != METHOD_TYPE
4640 && TREE_CODE (*node) != FIELD_DECL
4641 && TREE_CODE (*node) != TYPE_DECL)
4643 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4645 *no_add_attrs = true;
4649 /* Can combine regparm with all attributes but fastcall. */
4650 if (is_attribute_p ("regparm", name))
4654 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4656 error ("fastcall and regparm attributes are not compatible");
4659 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4661 error ("regparam and thiscall attributes are not compatible");
4664 cst = TREE_VALUE (args);
4665 if (TREE_CODE (cst) != INTEGER_CST)
4667 warning (OPT_Wattributes,
4668 "%qE attribute requires an integer constant argument",
4670 *no_add_attrs = true;
4672 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4674 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4676 *no_add_attrs = true;
4684 /* Do not warn when emulating the MS ABI. */
4685 if ((TREE_CODE (*node) != FUNCTION_TYPE
4686 && TREE_CODE (*node) != METHOD_TYPE)
4687 || ix86_function_type_abi (*node) != MS_ABI)
4688 warning (OPT_Wattributes, "%qE attribute ignored",
4690 *no_add_attrs = true;
4694 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4695 if (is_attribute_p ("fastcall", name))
4697 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4699 error ("fastcall and cdecl attributes are not compatible");
4701 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4703 error ("fastcall and stdcall attributes are not compatible");
4705 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4707 error ("fastcall and regparm attributes are not compatible");
4709 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4711 error ("fastcall and thiscall attributes are not compatible");
4715 /* Can combine stdcall with fastcall (redundant), regparm and
4717 else if (is_attribute_p ("stdcall", name))
4719 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4721 error ("stdcall and cdecl attributes are not compatible");
4723 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4725 error ("stdcall and fastcall attributes are not compatible");
4727 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4729 error ("stdcall and thiscall attributes are not compatible");
4733 /* Can combine cdecl with regparm and sseregparm. */
4734 else if (is_attribute_p ("cdecl", name))
4736 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4738 error ("stdcall and cdecl attributes are not compatible");
4740 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4742 error ("fastcall and cdecl attributes are not compatible");
4744 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4746 error ("cdecl and thiscall attributes are not compatible");
4749 else if (is_attribute_p ("thiscall", name))
4751 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
4752 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
4754 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4756 error ("stdcall and thiscall attributes are not compatible");
4758 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4760 error ("fastcall and thiscall attributes are not compatible");
4762 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4764 error ("cdecl and thiscall attributes are not compatible");
4768 /* Can combine sseregparm with all attributes. */
4773 /* Return 0 if the attributes for two types are incompatible, 1 if they
4774 are compatible, and 2 if they are nearly compatible (which causes a
4775 warning to be generated). */
4778 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4780 /* Check for mismatch of non-default calling convention. */
4781 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4783 if (TREE_CODE (type1) != FUNCTION_TYPE
4784 && TREE_CODE (type1) != METHOD_TYPE)
4787 /* Check for mismatched fastcall/regparm types. */
4788 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4789 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4790 || (ix86_function_regparm (type1, NULL)
4791 != ix86_function_regparm (type2, NULL)))
4794 /* Check for mismatched sseregparm types. */
4795 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4796 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4799 /* Check for mismatched thiscall types. */
4800 if (!lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type1))
4801 != !lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type2)))
4804 /* Check for mismatched return types (cdecl vs stdcall). */
4805 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4806 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4812 /* Return the regparm value for a function with the indicated TYPE and DECL.
4813 DECL may be NULL when calling function indirectly
4814 or considering a libcall. */
4817 ix86_function_regparm (const_tree type, const_tree decl)
4823 return (ix86_function_type_abi (type) == SYSV_ABI
4824 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
4826 regparm = ix86_regparm;
4827 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4830 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4834 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4837 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
4840 /* Use register calling convention for local functions when possible. */
4842 && TREE_CODE (decl) == FUNCTION_DECL
4846 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4847 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
4850 int local_regparm, globals = 0, regno;
4852 /* Make sure no regparm register is taken by a
4853 fixed register variable. */
4854 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4855 if (fixed_regs[local_regparm])
4858 /* We don't want to use regparm(3) for nested functions as
4859 these use a static chain pointer in the third argument. */
4860 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
4863 /* Each fixed register usage increases register pressure,
4864 so less registers should be used for argument passing.
4865 This functionality can be overriden by an explicit
4867 for (regno = 0; regno <= DI_REG; regno++)
4868 if (fixed_regs[regno])
4872 = globals < local_regparm ? local_regparm - globals : 0;
4874 if (local_regparm > regparm)
4875 regparm = local_regparm;
4882 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4883 DFmode (2) arguments in SSE registers for a function with the
4884 indicated TYPE and DECL. DECL may be NULL when calling function
4885 indirectly or considering a libcall. Otherwise return 0. */
4888 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4890 gcc_assert (!TARGET_64BIT);
4892 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4893 by the sseregparm attribute. */
4894 if (TARGET_SSEREGPARM
4895 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4902 error ("Calling %qD with attribute sseregparm without "
4903 "SSE/SSE2 enabled", decl);
4905 error ("Calling %qT with attribute sseregparm without "
4906 "SSE/SSE2 enabled", type);
4914 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4915 (and DFmode for SSE2) arguments in SSE registers. */
4916 if (decl && TARGET_SSE_MATH && optimize && !profile_flag)
4918 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4919 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4921 return TARGET_SSE2 ? 2 : 1;
4927 /* Return true if EAX is live at the start of the function. Used by
4928 ix86_expand_prologue to determine if we need special help before
4929 calling allocate_stack_worker. */
4932 ix86_eax_live_at_start_p (void)
4934 /* Cheat. Don't bother working forward from ix86_function_regparm
4935 to the function type to whether an actual argument is located in
4936 eax. Instead just look at cfg info, which is still close enough
4937 to correct at this point. This gives false positives for broken
4938 functions that might use uninitialized data that happens to be
4939 allocated in eax, but who cares? */
4940 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4943 /* Value is the number of bytes of arguments automatically
4944 popped when returning from a subroutine call.
4945 FUNDECL is the declaration node of the function (as a tree),
4946 FUNTYPE is the data type of the function (as a tree),
4947 or for a library call it is an identifier node for the subroutine name.
4948 SIZE is the number of bytes of arguments passed on the stack.
4950 On the 80386, the RTD insn may be used to pop them if the number
4951 of args is fixed, but if the number is variable then the caller
4952 must pop them all. RTD can't be used for library calls now
4953 because the library is compiled with the Unix compiler.
4954 Use of RTD is a selectable option, since it is incompatible with
4955 standard Unix calling sequences. If the option is not selected,
4956 the caller must always pop the args.
4958 The attribute stdcall is equivalent to RTD on a per module basis. */
4961 ix86_return_pops_args (tree fundecl, tree funtype, int size)
4965 /* None of the 64-bit ABIs pop arguments. */
4969 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
4971 /* Cdecl functions override -mrtd, and never pop the stack. */
4972 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
4974 /* Stdcall and fastcall functions will pop the stack if not
4976 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
4977 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype))
4978 || lookup_attribute ("thiscall", TYPE_ATTRIBUTES (funtype)))
4981 if (rtd && ! stdarg_p (funtype))
4985 /* Lose any fake structure return argument if it is passed on the stack. */
4986 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
4987 && !KEEP_AGGREGATE_RETURN_POINTER)
4989 int nregs = ix86_function_regparm (funtype, fundecl);
4991 return GET_MODE_SIZE (Pmode);
4997 /* Argument support functions. */
4999 /* Return true when register may be used to pass function parameters. */
5001 ix86_function_arg_regno_p (int regno)
5004 const int *parm_regs;
5009 return (regno < REGPARM_MAX
5010 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5012 return (regno < REGPARM_MAX
5013 || (TARGET_MMX && MMX_REGNO_P (regno)
5014 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5015 || (TARGET_SSE && SSE_REGNO_P (regno)
5016 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5021 if (SSE_REGNO_P (regno) && TARGET_SSE)
5026 if (TARGET_SSE && SSE_REGNO_P (regno)
5027 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5031 /* TODO: The function should depend on current function ABI but
5032 builtins.c would need updating then. Therefore we use the
5035 /* RAX is used as hidden argument to va_arg functions. */
5036 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5039 if (ix86_abi == MS_ABI)
5040 parm_regs = x86_64_ms_abi_int_parameter_registers;
5042 parm_regs = x86_64_int_parameter_registers;
5043 for (i = 0; i < (ix86_abi == MS_ABI
5044 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5045 if (regno == parm_regs[i])
5050 /* Return if we do not know how to pass TYPE solely in registers. */
5053 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5055 if (must_pass_in_stack_var_size_or_pad (mode, type))
5058 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5059 The layout_type routine is crafty and tries to trick us into passing
5060 currently unsupported vector types on the stack by using TImode. */
5061 return (!TARGET_64BIT && mode == TImode
5062 && type && TREE_CODE (type) != VECTOR_TYPE);
5065 /* It returns the size, in bytes, of the area reserved for arguments passed
5066 in registers for the function represented by fndecl dependent to the used
5069 ix86_reg_parm_stack_space (const_tree fndecl)
5071 enum calling_abi call_abi = SYSV_ABI;
5072 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5073 call_abi = ix86_function_abi (fndecl);
5075 call_abi = ix86_function_type_abi (fndecl);
5076 if (call_abi == MS_ABI)
5081 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5084 ix86_function_type_abi (const_tree fntype)
5086 if (TARGET_64BIT && fntype != NULL)
5088 enum calling_abi abi = ix86_abi;
5089 if (abi == SYSV_ABI)
5091 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5094 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5102 ix86_function_ms_hook_prologue (const_tree fntype)
5104 if (fntype && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fntype)))
5106 if (decl_function_context (fntype) != NULL_TREE)
5108 error_at (DECL_SOURCE_LOCATION (fntype),
5109 "ms_hook_prologue is not compatible with nested function");
5117 static enum calling_abi
5118 ix86_function_abi (const_tree fndecl)
5122 return ix86_function_type_abi (TREE_TYPE (fndecl));
5125 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5128 ix86_cfun_abi (void)
5130 if (! cfun || ! TARGET_64BIT)
5132 return cfun->machine->call_abi;
5135 /* Write the extra assembler code needed to declare a function properly. */
5138 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5141 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5145 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5146 unsigned int filler_cc = 0xcccccccc;
5148 for (i = 0; i < filler_count; i += 4)
5149 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5152 ASM_OUTPUT_LABEL (asm_out_file, fname);
5154 /* Output magic byte marker, if hot-patch attribute is set.
5155 For x86 case frame-pointer prologue will be emitted in
5160 /* leaq [%rsp + 0], %rsp */
5161 asm_fprintf (asm_out_file, ASM_BYTE
5162 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5164 /* movl.s %edi, %edi. */
5165 asm_fprintf (asm_out_file, ASM_BYTE "0x8b, 0xff\n");
5170 extern void init_regs (void);
5172 /* Implementation of call abi switching target hook. Specific to FNDECL
5173 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
5174 for more details. */
5176 ix86_call_abi_override (const_tree fndecl)
5178 if (fndecl == NULL_TREE)
5179 cfun->machine->call_abi = ix86_abi;
5181 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5184 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
5185 re-initialization of init_regs each time we switch function context since
5186 this is needed only during RTL expansion. */
5188 ix86_maybe_switch_abi (void)
5191 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5195 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5196 for a call to a function whose data type is FNTYPE.
5197 For a library call, FNTYPE is 0. */
5200 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5201 tree fntype, /* tree ptr for function decl */
5202 rtx libname, /* SYMBOL_REF of library name or 0 */
5205 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
5206 memset (cum, 0, sizeof (*cum));
5209 cum->call_abi = ix86_function_abi (fndecl);
5211 cum->call_abi = ix86_function_type_abi (fntype);
5212 /* Set up the number of registers to use for passing arguments. */
5214 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5215 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5216 "or subtarget optimization implying it");
5217 cum->nregs = ix86_regparm;
5220 cum->nregs = (cum->call_abi == SYSV_ABI
5221 ? X86_64_REGPARM_MAX
5222 : X86_64_MS_REGPARM_MAX);
5226 cum->sse_nregs = SSE_REGPARM_MAX;
5229 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5230 ? X86_64_SSE_REGPARM_MAX
5231 : X86_64_MS_SSE_REGPARM_MAX);
5235 cum->mmx_nregs = MMX_REGPARM_MAX;
5236 cum->warn_avx = true;
5237 cum->warn_sse = true;
5238 cum->warn_mmx = true;
5240 /* Because type might mismatch in between caller and callee, we need to
5241 use actual type of function for local calls.
5242 FIXME: cgraph_analyze can be told to actually record if function uses
5243 va_start so for local functions maybe_vaarg can be made aggressive
5245 FIXME: once typesytem is fixed, we won't need this code anymore. */
5247 fntype = TREE_TYPE (fndecl);
5248 cum->maybe_vaarg = (fntype
5249 ? (!prototype_p (fntype) || stdarg_p (fntype))
5254 /* If there are variable arguments, then we won't pass anything
5255 in registers in 32-bit mode. */
5256 if (stdarg_p (fntype))
5267 /* Use ecx and edx registers if function has fastcall attribute,
5268 else look for regparm information. */
5271 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
5274 cum->fastcall = 1; /* Same first register as in fastcall. */
5276 else if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
5282 cum->nregs = ix86_function_regparm (fntype, fndecl);
5285 /* Set up the number of SSE registers used for passing SFmode
5286 and DFmode arguments. Warn for mismatching ABI. */
5287 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
5291 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5292 But in the case of vector types, it is some vector mode.
5294 When we have only some of our vector isa extensions enabled, then there
5295 are some modes for which vector_mode_supported_p is false. For these
5296 modes, the generic vector support in gcc will choose some non-vector mode
5297 in order to implement the type. By computing the natural mode, we'll
5298 select the proper ABI location for the operand and not depend on whatever
5299 the middle-end decides to do with these vector types.
5301 The midde-end can't deal with the vector types > 16 bytes. In this
5302 case, we return the original mode and warn ABI change if CUM isn't
5305 static enum machine_mode
5306 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
5308 enum machine_mode mode = TYPE_MODE (type);
5310 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
5312 HOST_WIDE_INT size = int_size_in_bytes (type);
5313 if ((size == 8 || size == 16 || size == 32)
5314 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5315 && TYPE_VECTOR_SUBPARTS (type) > 1)
5317 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
5319 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5320 mode = MIN_MODE_VECTOR_FLOAT;
5322 mode = MIN_MODE_VECTOR_INT;
5324 /* Get the mode which has this inner mode and number of units. */
5325 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
5326 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
5327 && GET_MODE_INNER (mode) == innermode)
5329 if (size == 32 && !TARGET_AVX)
5331 static bool warnedavx;
5338 warning (0, "AVX vector argument without AVX "
5339 "enabled changes the ABI");
5341 return TYPE_MODE (type);
5354 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5355 this may not agree with the mode that the type system has chosen for the
5356 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5357 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5360 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5365 if (orig_mode != BLKmode)
5366 tmp = gen_rtx_REG (orig_mode, regno);
5369 tmp = gen_rtx_REG (mode, regno);
5370 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5371 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5377 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5378 of this code is to classify each 8bytes of incoming argument by the register
5379 class and assign registers accordingly. */
5381 /* Return the union class of CLASS1 and CLASS2.
5382 See the x86-64 PS ABI for details. */
5384 static enum x86_64_reg_class
5385 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5387 /* Rule #1: If both classes are equal, this is the resulting class. */
5388 if (class1 == class2)
5391 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5393 if (class1 == X86_64_NO_CLASS)
5395 if (class2 == X86_64_NO_CLASS)
5398 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5399 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5400 return X86_64_MEMORY_CLASS;
5402 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5403 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5404 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5405 return X86_64_INTEGERSI_CLASS;
5406 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5407 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5408 return X86_64_INTEGER_CLASS;
5410 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5412 if (class1 == X86_64_X87_CLASS
5413 || class1 == X86_64_X87UP_CLASS
5414 || class1 == X86_64_COMPLEX_X87_CLASS
5415 || class2 == X86_64_X87_CLASS
5416 || class2 == X86_64_X87UP_CLASS
5417 || class2 == X86_64_COMPLEX_X87_CLASS)
5418 return X86_64_MEMORY_CLASS;
5420 /* Rule #6: Otherwise class SSE is used. */
5421 return X86_64_SSE_CLASS;
5424 /* Classify the argument of type TYPE and mode MODE.
5425 CLASSES will be filled by the register class used to pass each word
5426 of the operand. The number of words is returned. In case the parameter
5427 should be passed in memory, 0 is returned. As a special case for zero
5428 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5430 BIT_OFFSET is used internally for handling records and specifies offset
5431 of the offset in bits modulo 256 to avoid overflow cases.
5433 See the x86-64 PS ABI for details.
5437 classify_argument (enum machine_mode mode, const_tree type,
5438 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5440 HOST_WIDE_INT bytes =
5441 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5442 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5444 /* Variable sized entities are always passed/returned in memory. */
5448 if (mode != VOIDmode
5449 && targetm.calls.must_pass_in_stack (mode, type))
5452 if (type && AGGREGATE_TYPE_P (type))
5456 enum x86_64_reg_class subclasses[MAX_CLASSES];
5458 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5462 for (i = 0; i < words; i++)
5463 classes[i] = X86_64_NO_CLASS;
5465 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5466 signalize memory class, so handle it as special case. */
5469 classes[0] = X86_64_NO_CLASS;
5473 /* Classify each field of record and merge classes. */
5474 switch (TREE_CODE (type))
5477 /* And now merge the fields of structure. */
5478 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5480 if (TREE_CODE (field) == FIELD_DECL)
5484 if (TREE_TYPE (field) == error_mark_node)
5487 /* Bitfields are always classified as integer. Handle them
5488 early, since later code would consider them to be
5489 misaligned integers. */
5490 if (DECL_BIT_FIELD (field))
5492 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5493 i < ((int_bit_position (field) + (bit_offset % 64))
5494 + tree_low_cst (DECL_SIZE (field), 0)
5497 merge_classes (X86_64_INTEGER_CLASS,
5504 type = TREE_TYPE (field);
5506 /* Flexible array member is ignored. */
5507 if (TYPE_MODE (type) == BLKmode
5508 && TREE_CODE (type) == ARRAY_TYPE
5509 && TYPE_SIZE (type) == NULL_TREE
5510 && TYPE_DOMAIN (type) != NULL_TREE
5511 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5516 if (!warned && warn_psabi)
5519 inform (input_location,
5520 "The ABI of passing struct with"
5521 " a flexible array member has"
5522 " changed in GCC 4.4");
5526 num = classify_argument (TYPE_MODE (type), type,
5528 (int_bit_position (field)
5529 + bit_offset) % 256);
5532 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5533 for (i = 0; i < num && (i + pos) < words; i++)
5535 merge_classes (subclasses[i], classes[i + pos]);
5542 /* Arrays are handled as small records. */
5545 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5546 TREE_TYPE (type), subclasses, bit_offset);
5550 /* The partial classes are now full classes. */
5551 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5552 subclasses[0] = X86_64_SSE_CLASS;
5553 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5554 && !((bit_offset % 64) == 0 && bytes == 4))
5555 subclasses[0] = X86_64_INTEGER_CLASS;
5557 for (i = 0; i < words; i++)
5558 classes[i] = subclasses[i % num];
5563 case QUAL_UNION_TYPE:
5564 /* Unions are similar to RECORD_TYPE but offset is always 0.
5566 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5568 if (TREE_CODE (field) == FIELD_DECL)
5572 if (TREE_TYPE (field) == error_mark_node)
5575 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5576 TREE_TYPE (field), subclasses,
5580 for (i = 0; i < num; i++)
5581 classes[i] = merge_classes (subclasses[i], classes[i]);
5592 /* When size > 16 bytes, if the first one isn't
5593 X86_64_SSE_CLASS or any other ones aren't
5594 X86_64_SSEUP_CLASS, everything should be passed in
5596 if (classes[0] != X86_64_SSE_CLASS)
5599 for (i = 1; i < words; i++)
5600 if (classes[i] != X86_64_SSEUP_CLASS)
5604 /* Final merger cleanup. */
5605 for (i = 0; i < words; i++)
5607 /* If one class is MEMORY, everything should be passed in
5609 if (classes[i] == X86_64_MEMORY_CLASS)
5612 /* The X86_64_SSEUP_CLASS should be always preceded by
5613 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5614 if (classes[i] == X86_64_SSEUP_CLASS
5615 && classes[i - 1] != X86_64_SSE_CLASS
5616 && classes[i - 1] != X86_64_SSEUP_CLASS)
5618 /* The first one should never be X86_64_SSEUP_CLASS. */
5619 gcc_assert (i != 0);
5620 classes[i] = X86_64_SSE_CLASS;
5623 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5624 everything should be passed in memory. */
5625 if (classes[i] == X86_64_X87UP_CLASS
5626 && (classes[i - 1] != X86_64_X87_CLASS))
5630 /* The first one should never be X86_64_X87UP_CLASS. */
5631 gcc_assert (i != 0);
5632 if (!warned && warn_psabi)
5635 inform (input_location,
5636 "The ABI of passing union with long double"
5637 " has changed in GCC 4.4");
5645 /* Compute alignment needed. We align all types to natural boundaries with
5646 exception of XFmode that is aligned to 64bits. */
5647 if (mode != VOIDmode && mode != BLKmode)
5649 int mode_alignment = GET_MODE_BITSIZE (mode);
5652 mode_alignment = 128;
5653 else if (mode == XCmode)
5654 mode_alignment = 256;
5655 if (COMPLEX_MODE_P (mode))
5656 mode_alignment /= 2;
5657 /* Misaligned fields are always returned in memory. */
5658 if (bit_offset % mode_alignment)
5662 /* for V1xx modes, just use the base mode */
5663 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
5664 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5665 mode = GET_MODE_INNER (mode);
5667 /* Classification of atomic types. */
5672 classes[0] = X86_64_SSE_CLASS;
5675 classes[0] = X86_64_SSE_CLASS;
5676 classes[1] = X86_64_SSEUP_CLASS;
5686 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5690 classes[0] = X86_64_INTEGERSI_CLASS;
5693 else if (size <= 64)
5695 classes[0] = X86_64_INTEGER_CLASS;
5698 else if (size <= 64+32)
5700 classes[0] = X86_64_INTEGER_CLASS;
5701 classes[1] = X86_64_INTEGERSI_CLASS;
5704 else if (size <= 64+64)
5706 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5714 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5718 /* OImode shouldn't be used directly. */
5723 if (!(bit_offset % 64))
5724 classes[0] = X86_64_SSESF_CLASS;
5726 classes[0] = X86_64_SSE_CLASS;
5729 classes[0] = X86_64_SSEDF_CLASS;
5732 classes[0] = X86_64_X87_CLASS;
5733 classes[1] = X86_64_X87UP_CLASS;
5736 classes[0] = X86_64_SSE_CLASS;
5737 classes[1] = X86_64_SSEUP_CLASS;
5740 classes[0] = X86_64_SSE_CLASS;
5741 if (!(bit_offset % 64))
5747 if (!warned && warn_psabi)
5750 inform (input_location,
5751 "The ABI of passing structure with complex float"
5752 " member has changed in GCC 4.4");
5754 classes[1] = X86_64_SSESF_CLASS;
5758 classes[0] = X86_64_SSEDF_CLASS;
5759 classes[1] = X86_64_SSEDF_CLASS;
5762 classes[0] = X86_64_COMPLEX_X87_CLASS;
5765 /* This modes is larger than 16 bytes. */
5773 classes[0] = X86_64_SSE_CLASS;
5774 classes[1] = X86_64_SSEUP_CLASS;
5775 classes[2] = X86_64_SSEUP_CLASS;
5776 classes[3] = X86_64_SSEUP_CLASS;
5784 classes[0] = X86_64_SSE_CLASS;
5785 classes[1] = X86_64_SSEUP_CLASS;
5793 classes[0] = X86_64_SSE_CLASS;
5799 gcc_assert (VECTOR_MODE_P (mode));
5804 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5806 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5807 classes[0] = X86_64_INTEGERSI_CLASS;
5809 classes[0] = X86_64_INTEGER_CLASS;
5810 classes[1] = X86_64_INTEGER_CLASS;
5811 return 1 + (bytes > 8);
5815 /* Examine the argument and return set number of register required in each
5816 class. Return 0 iff parameter should be passed in memory. */
5818 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5819 int *int_nregs, int *sse_nregs)
5821 enum x86_64_reg_class regclass[MAX_CLASSES];
5822 int n = classify_argument (mode, type, regclass, 0);
5828 for (n--; n >= 0; n--)
5829 switch (regclass[n])
5831 case X86_64_INTEGER_CLASS:
5832 case X86_64_INTEGERSI_CLASS:
5835 case X86_64_SSE_CLASS:
5836 case X86_64_SSESF_CLASS:
5837 case X86_64_SSEDF_CLASS:
5840 case X86_64_NO_CLASS:
5841 case X86_64_SSEUP_CLASS:
5843 case X86_64_X87_CLASS:
5844 case X86_64_X87UP_CLASS:
5848 case X86_64_COMPLEX_X87_CLASS:
5849 return in_return ? 2 : 0;
5850 case X86_64_MEMORY_CLASS:
5856 /* Construct container for the argument used by GCC interface. See
5857 FUNCTION_ARG for the detailed description. */
5860 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5861 const_tree type, int in_return, int nintregs, int nsseregs,
5862 const int *intreg, int sse_regno)
5864 /* The following variables hold the static issued_error state. */
5865 static bool issued_sse_arg_error;
5866 static bool issued_sse_ret_error;
5867 static bool issued_x87_ret_error;
5869 enum machine_mode tmpmode;
5871 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5872 enum x86_64_reg_class regclass[MAX_CLASSES];
5876 int needed_sseregs, needed_intregs;
5877 rtx exp[MAX_CLASSES];
5880 n = classify_argument (mode, type, regclass, 0);
5883 if (!examine_argument (mode, type, in_return, &needed_intregs,
5886 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5889 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5890 some less clueful developer tries to use floating-point anyway. */
5891 if (needed_sseregs && !TARGET_SSE)
5895 if (!issued_sse_ret_error)
5897 error ("SSE register return with SSE disabled");
5898 issued_sse_ret_error = true;
5901 else if (!issued_sse_arg_error)
5903 error ("SSE register argument with SSE disabled");
5904 issued_sse_arg_error = true;
5909 /* Likewise, error if the ABI requires us to return values in the
5910 x87 registers and the user specified -mno-80387. */
5911 if (!TARGET_80387 && in_return)
5912 for (i = 0; i < n; i++)
5913 if (regclass[i] == X86_64_X87_CLASS
5914 || regclass[i] == X86_64_X87UP_CLASS
5915 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5917 if (!issued_x87_ret_error)
5919 error ("x87 register return with x87 disabled");
5920 issued_x87_ret_error = true;
5925 /* First construct simple cases. Avoid SCmode, since we want to use
5926 single register to pass this type. */
5927 if (n == 1 && mode != SCmode)
5928 switch (regclass[0])
5930 case X86_64_INTEGER_CLASS:
5931 case X86_64_INTEGERSI_CLASS:
5932 return gen_rtx_REG (mode, intreg[0]);
5933 case X86_64_SSE_CLASS:
5934 case X86_64_SSESF_CLASS:
5935 case X86_64_SSEDF_CLASS:
5936 if (mode != BLKmode)
5937 return gen_reg_or_parallel (mode, orig_mode,
5938 SSE_REGNO (sse_regno));
5940 case X86_64_X87_CLASS:
5941 case X86_64_COMPLEX_X87_CLASS:
5942 return gen_rtx_REG (mode, FIRST_STACK_REG);
5943 case X86_64_NO_CLASS:
5944 /* Zero sized array, struct or class. */
5949 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5950 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5951 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5953 && regclass[0] == X86_64_SSE_CLASS
5954 && regclass[1] == X86_64_SSEUP_CLASS
5955 && regclass[2] == X86_64_SSEUP_CLASS
5956 && regclass[3] == X86_64_SSEUP_CLASS
5958 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5961 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
5962 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
5963 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
5964 && regclass[1] == X86_64_INTEGER_CLASS
5965 && (mode == CDImode || mode == TImode || mode == TFmode)
5966 && intreg[0] + 1 == intreg[1])
5967 return gen_rtx_REG (mode, intreg[0]);
5969 /* Otherwise figure out the entries of the PARALLEL. */
5970 for (i = 0; i < n; i++)
5974 switch (regclass[i])
5976 case X86_64_NO_CLASS:
5978 case X86_64_INTEGER_CLASS:
5979 case X86_64_INTEGERSI_CLASS:
5980 /* Merge TImodes on aligned occasions here too. */
5981 if (i * 8 + 8 > bytes)
5982 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
5983 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
5987 /* We've requested 24 bytes we don't have mode for. Use DImode. */
5988 if (tmpmode == BLKmode)
5990 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5991 gen_rtx_REG (tmpmode, *intreg),
5995 case X86_64_SSESF_CLASS:
5996 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5997 gen_rtx_REG (SFmode,
5998 SSE_REGNO (sse_regno)),
6002 case X86_64_SSEDF_CLASS:
6003 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6004 gen_rtx_REG (DFmode,
6005 SSE_REGNO (sse_regno)),
6009 case X86_64_SSE_CLASS:
6017 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6027 && regclass[1] == X86_64_SSEUP_CLASS
6028 && regclass[2] == X86_64_SSEUP_CLASS
6029 && regclass[3] == X86_64_SSEUP_CLASS);
6036 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6037 gen_rtx_REG (tmpmode,
6038 SSE_REGNO (sse_regno)),
6047 /* Empty aligned struct, union or class. */
6051 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6052 for (i = 0; i < nexps; i++)
6053 XVECEXP (ret, 0, i) = exp [i];
6057 /* Update the data in CUM to advance over an argument of mode MODE
6058 and data type TYPE. (TYPE is null for libcalls where that information
6059 may not be available.) */
6062 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6063 const_tree type, HOST_WIDE_INT bytes,
6064 HOST_WIDE_INT words)
6080 cum->words += words;
6081 cum->nregs -= words;
6082 cum->regno += words;
6084 if (cum->nregs <= 0)
6092 /* OImode shouldn't be used directly. */
6096 if (cum->float_in_sse < 2)
6099 if (cum->float_in_sse < 1)
6116 if (!type || !AGGREGATE_TYPE_P (type))
6118 cum->sse_words += words;
6119 cum->sse_nregs -= 1;
6120 cum->sse_regno += 1;
6121 if (cum->sse_nregs <= 0)
6135 if (!type || !AGGREGATE_TYPE_P (type))
6137 cum->mmx_words += words;
6138 cum->mmx_nregs -= 1;
6139 cum->mmx_regno += 1;
6140 if (cum->mmx_nregs <= 0)
6151 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6152 const_tree type, HOST_WIDE_INT words, bool named)
6154 int int_nregs, sse_nregs;
6156 /* Unnamed 256bit vector mode parameters are passed on stack. */
6157 if (!named && VALID_AVX256_REG_MODE (mode))
6160 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6161 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6163 cum->nregs -= int_nregs;
6164 cum->sse_nregs -= sse_nregs;
6165 cum->regno += int_nregs;
6166 cum->sse_regno += sse_nregs;
6170 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6171 cum->words = (cum->words + align - 1) & ~(align - 1);
6172 cum->words += words;
6177 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6178 HOST_WIDE_INT words)
6180 /* Otherwise, this should be passed indirect. */
6181 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6183 cum->words += words;
6191 /* Update the data in CUM to advance over an argument of mode MODE and
6192 data type TYPE. (TYPE is null for libcalls where that information
6193 may not be available.) */
6196 ix86_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6197 const_tree type, bool named)
6199 HOST_WIDE_INT bytes, words;
6201 if (mode == BLKmode)
6202 bytes = int_size_in_bytes (type);
6204 bytes = GET_MODE_SIZE (mode);
6205 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6208 mode = type_natural_mode (type, NULL);
6210 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6211 function_arg_advance_ms_64 (cum, bytes, words);
6212 else if (TARGET_64BIT)
6213 function_arg_advance_64 (cum, mode, type, words, named);
6215 function_arg_advance_32 (cum, mode, type, bytes, words);
6218 /* Define where to put the arguments to a function.
6219 Value is zero to push the argument on the stack,
6220 or a hard register in which to store the argument.
6222 MODE is the argument's machine mode.
6223 TYPE is the data type of the argument (as a tree).
6224 This is null for libcalls where that information may
6226 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6227 the preceding args and about the function being called.
6228 NAMED is nonzero if this argument is a named parameter
6229 (otherwise it is an extra parameter matching an ellipsis). */
6232 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6233 enum machine_mode orig_mode, const_tree type,
6234 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6236 static bool warnedsse, warnedmmx;
6238 /* Avoid the AL settings for the Unix64 ABI. */
6239 if (mode == VOIDmode)
6255 if (words <= cum->nregs)
6257 int regno = cum->regno;
6259 /* Fastcall allocates the first two DWORD (SImode) or
6260 smaller arguments to ECX and EDX if it isn't an
6266 || (type && AGGREGATE_TYPE_P (type)))
6269 /* ECX not EAX is the first allocated register. */
6270 if (regno == AX_REG)
6273 return gen_rtx_REG (mode, regno);
6278 if (cum->float_in_sse < 2)
6281 if (cum->float_in_sse < 1)
6285 /* In 32bit, we pass TImode in xmm registers. */
6292 if (!type || !AGGREGATE_TYPE_P (type))
6294 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
6297 warning (0, "SSE vector argument without SSE enabled "
6301 return gen_reg_or_parallel (mode, orig_mode,
6302 cum->sse_regno + FIRST_SSE_REG);
6307 /* OImode shouldn't be used directly. */
6316 if (!type || !AGGREGATE_TYPE_P (type))
6319 return gen_reg_or_parallel (mode, orig_mode,
6320 cum->sse_regno + FIRST_SSE_REG);
6330 if (!type || !AGGREGATE_TYPE_P (type))
6332 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
6335 warning (0, "MMX vector argument without MMX enabled "
6339 return gen_reg_or_parallel (mode, orig_mode,
6340 cum->mmx_regno + FIRST_MMX_REG);
6349 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6350 enum machine_mode orig_mode, const_tree type, bool named)
6352 /* Handle a hidden AL argument containing number of registers
6353 for varargs x86-64 functions. */
6354 if (mode == VOIDmode)
6355 return GEN_INT (cum->maybe_vaarg
6356 ? (cum->sse_nregs < 0
6357 ? X86_64_SSE_REGPARM_MAX
6372 /* Unnamed 256bit vector mode parameters are passed on stack. */
6378 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6380 &x86_64_int_parameter_registers [cum->regno],
6385 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6386 enum machine_mode orig_mode, bool named,
6387 HOST_WIDE_INT bytes)
6391 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6392 We use value of -2 to specify that current function call is MSABI. */
6393 if (mode == VOIDmode)
6394 return GEN_INT (-2);
6396 /* If we've run out of registers, it goes on the stack. */
6397 if (cum->nregs == 0)
6400 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6402 /* Only floating point modes are passed in anything but integer regs. */
6403 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6406 regno = cum->regno + FIRST_SSE_REG;
6411 /* Unnamed floating parameters are passed in both the
6412 SSE and integer registers. */
6413 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
6414 t2 = gen_rtx_REG (mode, regno);
6415 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
6416 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
6417 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
6420 /* Handle aggregated types passed in register. */
6421 if (orig_mode == BLKmode)
6423 if (bytes > 0 && bytes <= 8)
6424 mode = (bytes > 4 ? DImode : SImode);
6425 if (mode == BLKmode)
6429 return gen_reg_or_parallel (mode, orig_mode, regno);
6432 /* Return where to put the arguments to a function.
6433 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6435 MODE is the argument's machine mode. TYPE is the data type of the
6436 argument. It is null for libcalls where that information may not be
6437 available. CUM gives information about the preceding args and about
6438 the function being called. NAMED is nonzero if this argument is a
6439 named parameter (otherwise it is an extra parameter matching an
6443 ix86_function_arg (const CUMULATIVE_ARGS *cum, enum machine_mode omode,
6444 const_tree type, bool named)
6446 enum machine_mode mode = omode;
6447 HOST_WIDE_INT bytes, words;
6449 if (mode == BLKmode)
6450 bytes = int_size_in_bytes (type);
6452 bytes = GET_MODE_SIZE (mode);
6453 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6455 /* To simplify the code below, represent vector types with a vector mode
6456 even if MMX/SSE are not active. */
6457 if (type && TREE_CODE (type) == VECTOR_TYPE)
6458 mode = type_natural_mode (type, cum);
6460 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6461 return function_arg_ms_64 (cum, mode, omode, named, bytes);
6462 else if (TARGET_64BIT)
6463 return function_arg_64 (cum, mode, omode, type, named);
6465 return function_arg_32 (cum, mode, omode, type, bytes, words);
6468 /* A C expression that indicates when an argument must be passed by
6469 reference. If nonzero for an argument, a copy of that argument is
6470 made in memory and a pointer to the argument is passed instead of
6471 the argument itself. The pointer is passed in whatever way is
6472 appropriate for passing a pointer to that type. */
6475 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
6476 enum machine_mode mode ATTRIBUTE_UNUSED,
6477 const_tree type, bool named ATTRIBUTE_UNUSED)
6479 /* See Windows x64 Software Convention. */
6480 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6482 int msize = (int) GET_MODE_SIZE (mode);
6485 /* Arrays are passed by reference. */
6486 if (TREE_CODE (type) == ARRAY_TYPE)
6489 if (AGGREGATE_TYPE_P (type))
6491 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6492 are passed by reference. */
6493 msize = int_size_in_bytes (type);
6497 /* __m128 is passed by reference. */
6499 case 1: case 2: case 4: case 8:
6505 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6511 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
6514 contains_aligned_value_p (const_tree type)
6516 enum machine_mode mode = TYPE_MODE (type);
6517 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6521 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6523 if (TYPE_ALIGN (type) < 128)
6526 if (AGGREGATE_TYPE_P (type))
6528 /* Walk the aggregates recursively. */
6529 switch (TREE_CODE (type))
6533 case QUAL_UNION_TYPE:
6537 /* Walk all the structure fields. */
6538 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6540 if (TREE_CODE (field) == FIELD_DECL
6541 && contains_aligned_value_p (TREE_TYPE (field)))
6548 /* Just for use if some languages passes arrays by value. */
6549 if (contains_aligned_value_p (TREE_TYPE (type)))
6560 /* Gives the alignment boundary, in bits, of an argument with the
6561 specified mode and type. */
6564 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
6569 /* Since the main variant type is used for call, we convert it to
6570 the main variant type. */
6571 type = TYPE_MAIN_VARIANT (type);
6572 align = TYPE_ALIGN (type);
6575 align = GET_MODE_ALIGNMENT (mode);
6576 if (align < PARM_BOUNDARY)
6577 align = PARM_BOUNDARY;
6578 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6579 natural boundaries. */
6580 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6582 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6583 make an exception for SSE modes since these require 128bit
6586 The handling here differs from field_alignment. ICC aligns MMX
6587 arguments to 4 byte boundaries, while structure fields are aligned
6588 to 8 byte boundaries. */
6591 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6592 align = PARM_BOUNDARY;
6596 if (!contains_aligned_value_p (type))
6597 align = PARM_BOUNDARY;
6600 if (align > BIGGEST_ALIGNMENT)
6601 align = BIGGEST_ALIGNMENT;
6605 /* Return true if N is a possible register number of function value. */
6608 ix86_function_value_regno_p (const unsigned int regno)
6615 case FIRST_FLOAT_REG:
6616 /* TODO: The function should depend on current function ABI but
6617 builtins.c would need updating then. Therefore we use the
6619 if (TARGET_64BIT && ix86_abi == MS_ABI)
6621 return TARGET_FLOAT_RETURNS_IN_80387;
6627 if (TARGET_MACHO || TARGET_64BIT)
6635 /* Define how to find the value returned by a function.
6636 VALTYPE is the data type of the value (as a tree).
6637 If the precise function being called is known, FUNC is its FUNCTION_DECL;
6638 otherwise, FUNC is 0. */
6641 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
6642 const_tree fntype, const_tree fn)
6646 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
6647 we normally prevent this case when mmx is not available. However
6648 some ABIs may require the result to be returned like DImode. */
6649 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6650 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
6652 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
6653 we prevent this case when sse is not available. However some ABIs
6654 may require the result to be returned like integer TImode. */
6655 else if (mode == TImode
6656 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6657 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
6659 /* 32-byte vector modes in %ymm0. */
6660 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
6661 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
6663 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
6664 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
6665 regno = FIRST_FLOAT_REG;
6667 /* Most things go in %eax. */
6670 /* Override FP return register with %xmm0 for local functions when
6671 SSE math is enabled or for functions with sseregparm attribute. */
6672 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
6674 int sse_level = ix86_function_sseregparm (fntype, fn, false);
6675 if ((sse_level >= 1 && mode == SFmode)
6676 || (sse_level == 2 && mode == DFmode))
6677 regno = FIRST_SSE_REG;
6680 /* OImode shouldn't be used directly. */
6681 gcc_assert (mode != OImode);
6683 return gen_rtx_REG (orig_mode, regno);
6687 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
6692 /* Handle libcalls, which don't provide a type node. */
6693 if (valtype == NULL)
6705 return gen_rtx_REG (mode, FIRST_SSE_REG);
6708 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
6712 return gen_rtx_REG (mode, AX_REG);
6716 ret = construct_container (mode, orig_mode, valtype, 1,
6717 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6718 x86_64_int_return_registers, 0);
6720 /* For zero sized structures, construct_container returns NULL, but we
6721 need to keep rest of compiler happy by returning meaningful value. */
6723 ret = gen_rtx_REG (orig_mode, AX_REG);
6729 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6731 unsigned int regno = AX_REG;
6735 switch (GET_MODE_SIZE (mode))
6738 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6739 && !COMPLEX_MODE_P (mode))
6740 regno = FIRST_SSE_REG;
6744 if (mode == SFmode || mode == DFmode)
6745 regno = FIRST_SSE_REG;
6751 return gen_rtx_REG (orig_mode, regno);
6755 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6756 enum machine_mode orig_mode, enum machine_mode mode)
6758 const_tree fn, fntype;
6761 if (fntype_or_decl && DECL_P (fntype_or_decl))
6762 fn = fntype_or_decl;
6763 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6765 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6766 return function_value_ms_64 (orig_mode, mode);
6767 else if (TARGET_64BIT)
6768 return function_value_64 (orig_mode, mode, valtype);
6770 return function_value_32 (orig_mode, mode, fntype, fn);
6774 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6775 bool outgoing ATTRIBUTE_UNUSED)
6777 enum machine_mode mode, orig_mode;
6779 orig_mode = TYPE_MODE (valtype);
6780 mode = type_natural_mode (valtype, NULL);
6781 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6785 ix86_libcall_value (enum machine_mode mode)
6787 return ix86_function_value_1 (NULL, NULL, mode, mode);
6790 /* Return true iff type is returned in memory. */
6792 static int ATTRIBUTE_UNUSED
6793 return_in_memory_32 (const_tree type, enum machine_mode mode)
6797 if (mode == BLKmode)
6800 size = int_size_in_bytes (type);
6802 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6805 if (VECTOR_MODE_P (mode) || mode == TImode)
6807 /* User-created vectors small enough to fit in EAX. */
6811 /* MMX/3dNow values are returned in MM0,
6812 except when it doesn't exits. */
6814 return (TARGET_MMX ? 0 : 1);
6816 /* SSE values are returned in XMM0, except when it doesn't exist. */
6818 return (TARGET_SSE ? 0 : 1);
6820 /* AVX values are returned in YMM0, except when it doesn't exist. */
6822 return TARGET_AVX ? 0 : 1;
6831 /* OImode shouldn't be used directly. */
6832 gcc_assert (mode != OImode);
6837 static int ATTRIBUTE_UNUSED
6838 return_in_memory_64 (const_tree type, enum machine_mode mode)
6840 int needed_intregs, needed_sseregs;
6841 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6844 static int ATTRIBUTE_UNUSED
6845 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6847 HOST_WIDE_INT size = int_size_in_bytes (type);
6849 /* __m128 is returned in xmm0. */
6850 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6851 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6854 /* Otherwise, the size must be exactly in [1248]. */
6855 return (size != 1 && size != 2 && size != 4 && size != 8);
6859 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6861 #ifdef SUBTARGET_RETURN_IN_MEMORY
6862 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6864 const enum machine_mode mode = type_natural_mode (type, NULL);
6868 if (ix86_function_type_abi (fntype) == MS_ABI)
6869 return return_in_memory_ms_64 (type, mode);
6871 return return_in_memory_64 (type, mode);
6874 return return_in_memory_32 (type, mode);
6878 /* Return false iff TYPE is returned in memory. This version is used
6879 on Solaris 2. It is similar to the generic ix86_return_in_memory,
6880 but differs notably in that when MMX is available, 8-byte vectors
6881 are returned in memory, rather than in MMX registers. */
6884 ix86_solaris_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6887 enum machine_mode mode = type_natural_mode (type, NULL);
6890 return return_in_memory_64 (type, mode);
6892 if (mode == BLKmode)
6895 size = int_size_in_bytes (type);
6897 if (VECTOR_MODE_P (mode))
6899 /* Return in memory only if MMX registers *are* available. This
6900 seems backwards, but it is consistent with the existing
6907 else if (mode == TImode)
6909 else if (mode == XFmode)
6915 /* When returning SSE vector types, we have a choice of either
6916 (1) being abi incompatible with a -march switch, or
6917 (2) generating an error.
6918 Given no good solution, I think the safest thing is one warning.
6919 The user won't be able to use -Werror, but....
6921 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6922 called in response to actually generating a caller or callee that
6923 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6924 via aggregate_value_p for general type probing from tree-ssa. */
6927 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6929 static bool warnedsse, warnedmmx;
6931 if (!TARGET_64BIT && type)
6933 /* Look at the return type of the function, not the function type. */
6934 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6936 if (!TARGET_SSE && !warnedsse)
6939 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6942 warning (0, "SSE vector return without SSE enabled "
6947 if (!TARGET_MMX && !warnedmmx)
6949 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6952 warning (0, "MMX vector return without MMX enabled "
6962 /* Create the va_list data type. */
6964 /* Returns the calling convention specific va_list date type.
6965 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
6968 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6970 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6972 /* For i386 we use plain pointer to argument area. */
6973 if (!TARGET_64BIT || abi == MS_ABI)
6974 return build_pointer_type (char_type_node);
6976 record = lang_hooks.types.make_type (RECORD_TYPE);
6977 type_decl = build_decl (BUILTINS_LOCATION,
6978 TYPE_DECL, get_identifier ("__va_list_tag"), record);
6980 f_gpr = build_decl (BUILTINS_LOCATION,
6981 FIELD_DECL, get_identifier ("gp_offset"),
6982 unsigned_type_node);
6983 f_fpr = build_decl (BUILTINS_LOCATION,
6984 FIELD_DECL, get_identifier ("fp_offset"),
6985 unsigned_type_node);
6986 f_ovf = build_decl (BUILTINS_LOCATION,
6987 FIELD_DECL, get_identifier ("overflow_arg_area"),
6989 f_sav = build_decl (BUILTINS_LOCATION,
6990 FIELD_DECL, get_identifier ("reg_save_area"),
6993 va_list_gpr_counter_field = f_gpr;
6994 va_list_fpr_counter_field = f_fpr;
6996 DECL_FIELD_CONTEXT (f_gpr) = record;
6997 DECL_FIELD_CONTEXT (f_fpr) = record;
6998 DECL_FIELD_CONTEXT (f_ovf) = record;
6999 DECL_FIELD_CONTEXT (f_sav) = record;
7001 TREE_CHAIN (record) = type_decl;
7002 TYPE_NAME (record) = type_decl;
7003 TYPE_FIELDS (record) = f_gpr;
7004 DECL_CHAIN (f_gpr) = f_fpr;
7005 DECL_CHAIN (f_fpr) = f_ovf;
7006 DECL_CHAIN (f_ovf) = f_sav;
7008 layout_type (record);
7010 /* The correct type is an array type of one element. */
7011 return build_array_type (record, build_index_type (size_zero_node));
7014 /* Setup the builtin va_list data type and for 64-bit the additional
7015 calling convention specific va_list data types. */
7018 ix86_build_builtin_va_list (void)
7020 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7022 /* Initialize abi specific va_list builtin types. */
7026 if (ix86_abi == MS_ABI)
7028 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7029 if (TREE_CODE (t) != RECORD_TYPE)
7030 t = build_variant_type_copy (t);
7031 sysv_va_list_type_node = t;
7036 if (TREE_CODE (t) != RECORD_TYPE)
7037 t = build_variant_type_copy (t);
7038 sysv_va_list_type_node = t;
7040 if (ix86_abi != MS_ABI)
7042 t = ix86_build_builtin_va_list_abi (MS_ABI);
7043 if (TREE_CODE (t) != RECORD_TYPE)
7044 t = build_variant_type_copy (t);
7045 ms_va_list_type_node = t;
7050 if (TREE_CODE (t) != RECORD_TYPE)
7051 t = build_variant_type_copy (t);
7052 ms_va_list_type_node = t;
7059 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7062 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7068 /* GPR size of varargs save area. */
7069 if (cfun->va_list_gpr_size)
7070 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7072 ix86_varargs_gpr_size = 0;
7074 /* FPR size of varargs save area. We don't need it if we don't pass
7075 anything in SSE registers. */
7076 if (TARGET_SSE && cfun->va_list_fpr_size)
7077 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7079 ix86_varargs_fpr_size = 0;
7081 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7084 save_area = frame_pointer_rtx;
7085 set = get_varargs_alias_set ();
7087 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7088 if (max > X86_64_REGPARM_MAX)
7089 max = X86_64_REGPARM_MAX;
7091 for (i = cum->regno; i < max; i++)
7093 mem = gen_rtx_MEM (Pmode,
7094 plus_constant (save_area, i * UNITS_PER_WORD));
7095 MEM_NOTRAP_P (mem) = 1;
7096 set_mem_alias_set (mem, set);
7097 emit_move_insn (mem, gen_rtx_REG (Pmode,
7098 x86_64_int_parameter_registers[i]));
7101 if (ix86_varargs_fpr_size)
7103 enum machine_mode smode;
7106 /* Now emit code to save SSE registers. The AX parameter contains number
7107 of SSE parameter registers used to call this function, though all we
7108 actually check here is the zero/non-zero status. */
7110 label = gen_label_rtx ();
7111 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7112 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7115 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7116 we used movdqa (i.e. TImode) instead? Perhaps even better would
7117 be if we could determine the real mode of the data, via a hook
7118 into pass_stdarg. Ignore all that for now. */
7120 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7121 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7123 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7124 if (max > X86_64_SSE_REGPARM_MAX)
7125 max = X86_64_SSE_REGPARM_MAX;
7127 for (i = cum->sse_regno; i < max; ++i)
7129 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7130 mem = gen_rtx_MEM (smode, mem);
7131 MEM_NOTRAP_P (mem) = 1;
7132 set_mem_alias_set (mem, set);
7133 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7135 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7143 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7145 alias_set_type set = get_varargs_alias_set ();
7148 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7152 mem = gen_rtx_MEM (Pmode,
7153 plus_constant (virtual_incoming_args_rtx,
7154 i * UNITS_PER_WORD));
7155 MEM_NOTRAP_P (mem) = 1;
7156 set_mem_alias_set (mem, set);
7158 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7159 emit_move_insn (mem, reg);
7164 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7165 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7168 CUMULATIVE_ARGS next_cum;
7171 /* This argument doesn't appear to be used anymore. Which is good,
7172 because the old code here didn't suppress rtl generation. */
7173 gcc_assert (!no_rtl);
7178 fntype = TREE_TYPE (current_function_decl);
7180 /* For varargs, we do not want to skip the dummy va_dcl argument.
7181 For stdargs, we do want to skip the last named argument. */
7183 if (stdarg_p (fntype))
7184 ix86_function_arg_advance (&next_cum, mode, type, true);
7186 if (cum->call_abi == MS_ABI)
7187 setup_incoming_varargs_ms_64 (&next_cum);
7189 setup_incoming_varargs_64 (&next_cum);
7192 /* Checks if TYPE is of kind va_list char *. */
7195 is_va_list_char_pointer (tree type)
7199 /* For 32-bit it is always true. */
7202 canonic = ix86_canonical_va_list_type (type);
7203 return (canonic == ms_va_list_type_node
7204 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7207 /* Implement va_start. */
7210 ix86_va_start (tree valist, rtx nextarg)
7212 HOST_WIDE_INT words, n_gpr, n_fpr;
7213 tree f_gpr, f_fpr, f_ovf, f_sav;
7214 tree gpr, fpr, ovf, sav, t;
7217 /* Only 64bit target needs something special. */
7218 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7220 std_expand_builtin_va_start (valist, nextarg);
7224 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7225 f_fpr = DECL_CHAIN (f_gpr);
7226 f_ovf = DECL_CHAIN (f_fpr);
7227 f_sav = DECL_CHAIN (f_ovf);
7229 valist = build_simple_mem_ref (valist);
7230 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
7231 /* The following should be folded into the MEM_REF offset. */
7232 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
7234 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
7236 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
7238 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
7241 /* Count number of gp and fp argument registers used. */
7242 words = crtl->args.info.words;
7243 n_gpr = crtl->args.info.regno;
7244 n_fpr = crtl->args.info.sse_regno;
7246 if (cfun->va_list_gpr_size)
7248 type = TREE_TYPE (gpr);
7249 t = build2 (MODIFY_EXPR, type,
7250 gpr, build_int_cst (type, n_gpr * 8));
7251 TREE_SIDE_EFFECTS (t) = 1;
7252 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7255 if (TARGET_SSE && cfun->va_list_fpr_size)
7257 type = TREE_TYPE (fpr);
7258 t = build2 (MODIFY_EXPR, type, fpr,
7259 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
7260 TREE_SIDE_EFFECTS (t) = 1;
7261 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7264 /* Find the overflow area. */
7265 type = TREE_TYPE (ovf);
7266 t = make_tree (type, crtl->args.internal_arg_pointer);
7268 t = build2 (POINTER_PLUS_EXPR, type, t,
7269 size_int (words * UNITS_PER_WORD));
7270 t = build2 (MODIFY_EXPR, type, ovf, t);
7271 TREE_SIDE_EFFECTS (t) = 1;
7272 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7274 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
7276 /* Find the register save area.
7277 Prologue of the function save it right above stack frame. */
7278 type = TREE_TYPE (sav);
7279 t = make_tree (type, frame_pointer_rtx);
7280 if (!ix86_varargs_gpr_size)
7281 t = build2 (POINTER_PLUS_EXPR, type, t,
7282 size_int (-8 * X86_64_REGPARM_MAX));
7283 t = build2 (MODIFY_EXPR, type, sav, t);
7284 TREE_SIDE_EFFECTS (t) = 1;
7285 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7289 /* Implement va_arg. */
7292 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7295 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
7296 tree f_gpr, f_fpr, f_ovf, f_sav;
7297 tree gpr, fpr, ovf, sav, t;
7299 tree lab_false, lab_over = NULL_TREE;
7304 enum machine_mode nat_mode;
7305 unsigned int arg_boundary;
7307 /* Only 64bit target needs something special. */
7308 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7309 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
7311 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7312 f_fpr = DECL_CHAIN (f_gpr);
7313 f_ovf = DECL_CHAIN (f_fpr);
7314 f_sav = DECL_CHAIN (f_ovf);
7316 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
7317 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
7318 valist = build_va_arg_indirect_ref (valist);
7319 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
7320 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
7321 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
7323 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7325 type = build_pointer_type (type);
7326 size = int_size_in_bytes (type);
7327 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7329 nat_mode = type_natural_mode (type, NULL);
7338 /* Unnamed 256bit vector mode parameters are passed on stack. */
7339 if (ix86_cfun_abi () == SYSV_ABI)
7346 container = construct_container (nat_mode, TYPE_MODE (type),
7347 type, 0, X86_64_REGPARM_MAX,
7348 X86_64_SSE_REGPARM_MAX, intreg,
7353 /* Pull the value out of the saved registers. */
7355 addr = create_tmp_var (ptr_type_node, "addr");
7359 int needed_intregs, needed_sseregs;
7361 tree int_addr, sse_addr;
7363 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7364 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7366 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
7368 need_temp = (!REG_P (container)
7369 && ((needed_intregs && TYPE_ALIGN (type) > 64)
7370 || TYPE_ALIGN (type) > 128));
7372 /* In case we are passing structure, verify that it is consecutive block
7373 on the register save area. If not we need to do moves. */
7374 if (!need_temp && !REG_P (container))
7376 /* Verify that all registers are strictly consecutive */
7377 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
7381 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7383 rtx slot = XVECEXP (container, 0, i);
7384 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
7385 || INTVAL (XEXP (slot, 1)) != i * 16)
7393 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7395 rtx slot = XVECEXP (container, 0, i);
7396 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
7397 || INTVAL (XEXP (slot, 1)) != i * 8)
7409 int_addr = create_tmp_var (ptr_type_node, "int_addr");
7410 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
7413 /* First ensure that we fit completely in registers. */
7416 t = build_int_cst (TREE_TYPE (gpr),
7417 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
7418 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
7419 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7420 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7421 gimplify_and_add (t, pre_p);
7425 t = build_int_cst (TREE_TYPE (fpr),
7426 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7427 + X86_64_REGPARM_MAX * 8);
7428 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
7429 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7430 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7431 gimplify_and_add (t, pre_p);
7434 /* Compute index to start of area used for integer regs. */
7437 /* int_addr = gpr + sav; */
7438 t = fold_convert (sizetype, gpr);
7439 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7440 gimplify_assign (int_addr, t, pre_p);
7444 /* sse_addr = fpr + sav; */
7445 t = fold_convert (sizetype, fpr);
7446 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7447 gimplify_assign (sse_addr, t, pre_p);
7451 int i, prev_size = 0;
7452 tree temp = create_tmp_var (type, "va_arg_tmp");
7455 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
7456 gimplify_assign (addr, t, pre_p);
7458 for (i = 0; i < XVECLEN (container, 0); i++)
7460 rtx slot = XVECEXP (container, 0, i);
7461 rtx reg = XEXP (slot, 0);
7462 enum machine_mode mode = GET_MODE (reg);
7468 tree dest_addr, dest;
7469 int cur_size = GET_MODE_SIZE (mode);
7471 if (prev_size + cur_size > size)
7473 cur_size = size - prev_size;
7474 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
7475 if (mode == BLKmode)
7478 piece_type = lang_hooks.types.type_for_mode (mode, 1);
7479 if (mode == GET_MODE (reg))
7480 addr_type = build_pointer_type (piece_type);
7482 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7484 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7487 if (SSE_REGNO_P (REGNO (reg)))
7489 src_addr = sse_addr;
7490 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7494 src_addr = int_addr;
7495 src_offset = REGNO (reg) * 8;
7497 src_addr = fold_convert (addr_type, src_addr);
7498 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
7499 size_int (src_offset));
7501 dest_addr = fold_convert (daddr_type, addr);
7502 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
7503 size_int (INTVAL (XEXP (slot, 1))));
7504 if (cur_size == GET_MODE_SIZE (mode))
7506 src = build_va_arg_indirect_ref (src_addr);
7507 dest = build_va_arg_indirect_ref (dest_addr);
7509 gimplify_assign (dest, src, pre_p);
7514 = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
7515 3, dest_addr, src_addr,
7516 size_int (cur_size));
7517 gimplify_and_add (copy, pre_p);
7519 prev_size += cur_size;
7525 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7526 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7527 gimplify_assign (gpr, t, pre_p);
7532 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7533 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7534 gimplify_assign (fpr, t, pre_p);
7537 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7539 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7542 /* ... otherwise out of the overflow area. */
7544 /* When we align parameter on stack for caller, if the parameter
7545 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7546 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7547 here with caller. */
7548 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
7549 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7550 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7552 /* Care for on-stack alignment if needed. */
7553 if (arg_boundary <= 64 || size == 0)
7557 HOST_WIDE_INT align = arg_boundary / 8;
7558 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
7559 size_int (align - 1));
7560 t = fold_convert (sizetype, t);
7561 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7563 t = fold_convert (TREE_TYPE (ovf), t);
7566 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7567 gimplify_assign (addr, t, pre_p);
7569 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
7570 size_int (rsize * UNITS_PER_WORD));
7571 gimplify_assign (unshare_expr (ovf), t, pre_p);
7574 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7576 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
7577 addr = fold_convert (ptrtype, addr);
7580 addr = build_va_arg_indirect_ref (addr);
7581 return build_va_arg_indirect_ref (addr);
7584 /* Return nonzero if OPNUM's MEM should be matched
7585 in movabs* patterns. */
7588 ix86_check_movabs (rtx insn, int opnum)
7592 set = PATTERN (insn);
7593 if (GET_CODE (set) == PARALLEL)
7594 set = XVECEXP (set, 0, 0);
7595 gcc_assert (GET_CODE (set) == SET);
7596 mem = XEXP (set, opnum);
7597 while (GET_CODE (mem) == SUBREG)
7598 mem = SUBREG_REG (mem);
7599 gcc_assert (MEM_P (mem));
7600 return (volatile_ok || !MEM_VOLATILE_P (mem));
7603 /* Initialize the table of extra 80387 mathematical constants. */
7606 init_ext_80387_constants (void)
7608 static const char * cst[5] =
7610 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
7611 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
7612 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
7613 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
7614 "3.1415926535897932385128089594061862044", /* 4: fldpi */
7618 for (i = 0; i < 5; i++)
7620 real_from_string (&ext_80387_constants_table[i], cst[i]);
7621 /* Ensure each constant is rounded to XFmode precision. */
7622 real_convert (&ext_80387_constants_table[i],
7623 XFmode, &ext_80387_constants_table[i]);
7626 ext_80387_constants_init = 1;
7629 /* Return true if the constant is something that can be loaded with
7630 a special instruction. */
7633 standard_80387_constant_p (rtx x)
7635 enum machine_mode mode = GET_MODE (x);
7639 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
7642 if (x == CONST0_RTX (mode))
7644 if (x == CONST1_RTX (mode))
7647 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7649 /* For XFmode constants, try to find a special 80387 instruction when
7650 optimizing for size or on those CPUs that benefit from them. */
7652 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
7656 if (! ext_80387_constants_init)
7657 init_ext_80387_constants ();
7659 for (i = 0; i < 5; i++)
7660 if (real_identical (&r, &ext_80387_constants_table[i]))
7664 /* Load of the constant -0.0 or -1.0 will be split as
7665 fldz;fchs or fld1;fchs sequence. */
7666 if (real_isnegzero (&r))
7668 if (real_identical (&r, &dconstm1))
7674 /* Return the opcode of the special instruction to be used to load
7678 standard_80387_constant_opcode (rtx x)
7680 switch (standard_80387_constant_p (x))
7704 /* Return the CONST_DOUBLE representing the 80387 constant that is
7705 loaded by the specified special instruction. The argument IDX
7706 matches the return value from standard_80387_constant_p. */
7709 standard_80387_constant_rtx (int idx)
7713 if (! ext_80387_constants_init)
7714 init_ext_80387_constants ();
7730 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
7734 /* Return 1 if X is all 0s and 2 if x is all 1s
7735 in supported SSE vector mode. */
7738 standard_sse_constant_p (rtx x)
7740 enum machine_mode mode = GET_MODE (x);
7742 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
7744 if (vector_all_ones_operand (x, mode))
7760 /* Return the opcode of the special instruction to be used to load
7764 standard_sse_constant_opcode (rtx insn, rtx x)
7766 switch (standard_sse_constant_p (x))
7769 switch (get_attr_mode (insn))
7772 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7774 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7775 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7777 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7779 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7780 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7782 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7784 return "vxorps\t%x0, %x0, %x0";
7786 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7787 return "vxorps\t%x0, %x0, %x0";
7789 return "vxorpd\t%x0, %x0, %x0";
7791 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7792 return "vxorps\t%x0, %x0, %x0";
7794 return "vpxor\t%x0, %x0, %x0";
7799 return TARGET_AVX ? "vpcmpeqd\t%0, %0, %0" : "pcmpeqd\t%0, %0";
7806 /* Returns 1 if OP contains a symbol reference */
7809 symbolic_reference_mentioned_p (rtx op)
7814 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7817 fmt = GET_RTX_FORMAT (GET_CODE (op));
7818 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7824 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7825 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7829 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7836 /* Return 1 if it is appropriate to emit `ret' instructions in the
7837 body of a function. Do this only if the epilogue is simple, needing a
7838 couple of insns. Prior to reloading, we can't tell how many registers
7839 must be saved, so return 0 then. Return 0 if there is no frame
7840 marker to de-allocate. */
7843 ix86_can_use_return_insn_p (void)
7845 struct ix86_frame frame;
7847 if (! reload_completed || frame_pointer_needed)
7850 /* Don't allow more than 32 pop, since that's all we can do
7851 with one instruction. */
7852 if (crtl->args.pops_args
7853 && crtl->args.size >= 32768)
7856 ix86_compute_frame_layout (&frame);
7857 return frame.to_allocate == 0 && frame.padding0 == 0
7858 && (frame.nregs + frame.nsseregs) == 0;
7861 /* Value should be nonzero if functions must have frame pointers.
7862 Zero means the frame pointer need not be set up (and parms may
7863 be accessed via the stack pointer) in functions that seem suitable. */
7866 ix86_frame_pointer_required (void)
7868 /* If we accessed previous frames, then the generated code expects
7869 to be able to access the saved ebp value in our frame. */
7870 if (cfun->machine->accesses_prev_frame)
7873 /* Several x86 os'es need a frame pointer for other reasons,
7874 usually pertaining to setjmp. */
7875 if (SUBTARGET_FRAME_POINTER_REQUIRED)
7878 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
7879 the frame pointer by default. Turn it back on now if we've not
7880 got a leaf function. */
7881 if (TARGET_OMIT_LEAF_FRAME_POINTER
7882 && (!current_function_is_leaf
7883 || ix86_current_function_calls_tls_descriptor))
7892 /* Record that the current function accesses previous call frames. */
7895 ix86_setup_frame_addresses (void)
7897 cfun->machine->accesses_prev_frame = 1;
7900 #ifndef USE_HIDDEN_LINKONCE
7901 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7902 # define USE_HIDDEN_LINKONCE 1
7904 # define USE_HIDDEN_LINKONCE 0
7908 static int pic_labels_used;
7910 /* Fills in the label name that should be used for a pc thunk for
7911 the given register. */
7914 get_pc_thunk_name (char name[32], unsigned int regno)
7916 gcc_assert (!TARGET_64BIT);
7918 if (USE_HIDDEN_LINKONCE)
7919 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7921 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7925 /* This function generates code for -fpic that loads %ebx with
7926 the return address of the caller and then returns. */
7929 ix86_code_end (void)
7934 for (regno = 0; regno < 8; ++regno)
7939 if (! ((pic_labels_used >> regno) & 1))
7942 get_pc_thunk_name (name, regno);
7944 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
7945 get_identifier (name),
7946 build_function_type (void_type_node, void_list_node));
7947 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
7948 NULL_TREE, void_type_node);
7949 TREE_PUBLIC (decl) = 1;
7950 TREE_STATIC (decl) = 1;
7955 switch_to_section (darwin_sections[text_coal_section]);
7956 fputs ("\t.weak_definition\t", asm_out_file);
7957 assemble_name (asm_out_file, name);
7958 fputs ("\n\t.private_extern\t", asm_out_file);
7959 assemble_name (asm_out_file, name);
7960 putc ('\n', asm_out_file);
7961 ASM_OUTPUT_LABEL (asm_out_file, name);
7962 DECL_WEAK (decl) = 1;
7966 if (USE_HIDDEN_LINKONCE)
7968 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
7970 targetm.asm_out.unique_section (decl, 0);
7971 switch_to_section (get_named_section (decl, NULL, 0));
7973 targetm.asm_out.globalize_label (asm_out_file, name);
7974 fputs ("\t.hidden\t", asm_out_file);
7975 assemble_name (asm_out_file, name);
7976 putc ('\n', asm_out_file);
7977 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
7981 switch_to_section (text_section);
7982 ASM_OUTPUT_LABEL (asm_out_file, name);
7985 DECL_INITIAL (decl) = make_node (BLOCK);
7986 current_function_decl = decl;
7987 init_function_start (decl);
7988 first_function_block_is_cold = false;
7989 /* Make sure unwind info is emitted for the thunk if needed. */
7990 final_start_function (emit_barrier (), asm_out_file, 1);
7992 xops[0] = gen_rtx_REG (Pmode, regno);
7993 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
7994 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
7995 output_asm_insn ("ret", xops);
7996 final_end_function ();
7997 init_insn_lengths ();
7998 free_after_compilation (cfun);
8000 current_function_decl = NULL;
8004 /* Emit code for the SET_GOT patterns. */
8007 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8013 if (TARGET_VXWORKS_RTP && flag_pic)
8015 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8016 xops[2] = gen_rtx_MEM (Pmode,
8017 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8018 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8020 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8021 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8022 an unadorned address. */
8023 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8024 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8025 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8029 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8031 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
8033 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8036 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8039 output_asm_insn ("call\t%a2", xops);
8040 #ifdef DWARF2_UNWIND_INFO
8041 /* The call to next label acts as a push. */
8042 if (dwarf2out_do_frame ())
8046 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8047 gen_rtx_PLUS (Pmode,
8050 RTX_FRAME_RELATED_P (insn) = 1;
8051 dwarf2out_frame_debug (insn, true);
8058 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8059 is what will be referenced by the Mach-O PIC subsystem. */
8061 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8064 targetm.asm_out.internal_label (asm_out_file, "L",
8065 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8069 output_asm_insn ("pop%z0\t%0", xops);
8070 #ifdef DWARF2_UNWIND_INFO
8071 /* The pop is a pop and clobbers dest, but doesn't restore it
8072 for unwind info purposes. */
8073 if (dwarf2out_do_frame ())
8077 insn = emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
8078 dwarf2out_frame_debug (insn, true);
8079 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8080 gen_rtx_PLUS (Pmode,
8083 RTX_FRAME_RELATED_P (insn) = 1;
8084 dwarf2out_frame_debug (insn, true);
8093 get_pc_thunk_name (name, REGNO (dest));
8094 pic_labels_used |= 1 << REGNO (dest);
8096 #ifdef DWARF2_UNWIND_INFO
8097 /* Ensure all queued register saves are flushed before the
8099 if (dwarf2out_do_frame ())
8103 insn = emit_barrier ();
8105 dwarf2out_frame_debug (insn, false);
8108 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8109 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8110 output_asm_insn ("call\t%X2", xops);
8111 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8112 is what will be referenced by the Mach-O PIC subsystem. */
8115 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8117 targetm.asm_out.internal_label (asm_out_file, "L",
8118 CODE_LABEL_NUMBER (label));
8125 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
8126 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8128 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
8133 /* Generate an "push" pattern for input ARG. */
8138 if (ix86_cfa_state->reg == stack_pointer_rtx)
8139 ix86_cfa_state->offset += UNITS_PER_WORD;
8141 return gen_rtx_SET (VOIDmode,
8143 gen_rtx_PRE_DEC (Pmode,
8144 stack_pointer_rtx)),
8148 /* Return >= 0 if there is an unused call-clobbered register available
8149 for the entire function. */
8152 ix86_select_alt_pic_regnum (void)
8154 if (current_function_is_leaf && !crtl->profile
8155 && !ix86_current_function_calls_tls_descriptor)
8158 /* Can't use the same register for both PIC and DRAP. */
8160 drap = REGNO (crtl->drap_reg);
8163 for (i = 2; i >= 0; --i)
8164 if (i != drap && !df_regs_ever_live_p (i))
8168 return INVALID_REGNUM;
8171 /* Return 1 if we need to save REGNO. */
8173 ix86_save_reg (unsigned int regno, int maybe_eh_return)
8175 if (pic_offset_table_rtx
8176 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
8177 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8179 || crtl->calls_eh_return
8180 || crtl->uses_const_pool))
8182 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
8187 if (crtl->calls_eh_return && maybe_eh_return)
8192 unsigned test = EH_RETURN_DATA_REGNO (i);
8193 if (test == INVALID_REGNUM)
8200 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8203 return (df_regs_ever_live_p (regno)
8204 && !call_used_regs[regno]
8205 && !fixed_regs[regno]
8206 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
8209 /* Return number of saved general prupose registers. */
8212 ix86_nsaved_regs (void)
8217 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8218 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8223 /* Return number of saved SSE registrers. */
8226 ix86_nsaved_sseregs (void)
8231 if (ix86_cfun_abi () != MS_ABI)
8233 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8234 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8239 /* Given FROM and TO register numbers, say whether this elimination is
8240 allowed. If stack alignment is needed, we can only replace argument
8241 pointer with hard frame pointer, or replace frame pointer with stack
8242 pointer. Otherwise, frame pointer elimination is automatically
8243 handled and all other eliminations are valid. */
8246 ix86_can_eliminate (const int from, const int to)
8248 if (stack_realign_fp)
8249 return ((from == ARG_POINTER_REGNUM
8250 && to == HARD_FRAME_POINTER_REGNUM)
8251 || (from == FRAME_POINTER_REGNUM
8252 && to == STACK_POINTER_REGNUM));
8254 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
8257 /* Return the offset between two registers, one to be eliminated, and the other
8258 its replacement, at the start of a routine. */
8261 ix86_initial_elimination_offset (int from, int to)
8263 struct ix86_frame frame;
8264 ix86_compute_frame_layout (&frame);
8266 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8267 return frame.hard_frame_pointer_offset;
8268 else if (from == FRAME_POINTER_REGNUM
8269 && to == HARD_FRAME_POINTER_REGNUM)
8270 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
8273 gcc_assert (to == STACK_POINTER_REGNUM);
8275 if (from == ARG_POINTER_REGNUM)
8276 return frame.stack_pointer_offset;
8278 gcc_assert (from == FRAME_POINTER_REGNUM);
8279 return frame.stack_pointer_offset - frame.frame_pointer_offset;
8283 /* In a dynamically-aligned function, we can't know the offset from
8284 stack pointer to frame pointer, so we must ensure that setjmp
8285 eliminates fp against the hard fp (%ebp) rather than trying to
8286 index from %esp up to the top of the frame across a gap that is
8287 of unknown (at compile-time) size. */
8289 ix86_builtin_setjmp_frame_value (void)
8291 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
8294 /* Fill structure ix86_frame about frame of currently computed function. */
8297 ix86_compute_frame_layout (struct ix86_frame *frame)
8299 unsigned int stack_alignment_needed;
8300 HOST_WIDE_INT offset;
8301 unsigned int preferred_alignment;
8302 HOST_WIDE_INT size = get_frame_size ();
8304 frame->nregs = ix86_nsaved_regs ();
8305 frame->nsseregs = ix86_nsaved_sseregs ();
8307 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
8308 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
8310 /* MS ABI seem to require stack alignment to be always 16 except for function
8311 prologues and leaf. */
8312 if ((ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
8313 && (!current_function_is_leaf || cfun->calls_alloca != 0
8314 || ix86_current_function_calls_tls_descriptor))
8316 preferred_alignment = 16;
8317 stack_alignment_needed = 16;
8318 crtl->preferred_stack_boundary = 128;
8319 crtl->stack_alignment_needed = 128;
8322 gcc_assert (!size || stack_alignment_needed);
8323 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
8324 gcc_assert (preferred_alignment <= stack_alignment_needed);
8326 /* During reload iteration the amount of registers saved can change.
8327 Recompute the value as needed. Do not recompute when amount of registers
8328 didn't change as reload does multiple calls to the function and does not
8329 expect the decision to change within single iteration. */
8330 if (!optimize_function_for_size_p (cfun)
8331 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
8333 int count = frame->nregs;
8334 struct cgraph_node *node = cgraph_node (current_function_decl);
8336 cfun->machine->use_fast_prologue_epilogue_nregs = count;
8337 /* The fast prologue uses move instead of push to save registers. This
8338 is significantly longer, but also executes faster as modern hardware
8339 can execute the moves in parallel, but can't do that for push/pop.
8341 Be careful about choosing what prologue to emit: When function takes
8342 many instructions to execute we may use slow version as well as in
8343 case function is known to be outside hot spot (this is known with
8344 feedback only). Weight the size of function by number of registers
8345 to save as it is cheap to use one or two push instructions but very
8346 slow to use many of them. */
8348 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
8349 if (node->frequency < NODE_FREQUENCY_NORMAL
8350 || (flag_branch_probabilities
8351 && node->frequency < NODE_FREQUENCY_HOT))
8352 cfun->machine->use_fast_prologue_epilogue = false;
8354 cfun->machine->use_fast_prologue_epilogue
8355 = !expensive_function_p (count);
8357 if (TARGET_PROLOGUE_USING_MOVE
8358 && cfun->machine->use_fast_prologue_epilogue)
8359 frame->save_regs_using_mov = true;
8361 frame->save_regs_using_mov = false;
8363 /* If static stack checking is enabled and done with probes, the registers
8364 need to be saved before allocating the frame. */
8365 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
8366 frame->save_regs_using_mov = false;
8368 /* Skip return address. */
8369 offset = UNITS_PER_WORD;
8371 /* Skip pushed static chain. */
8372 if (ix86_static_chain_on_stack)
8373 offset += UNITS_PER_WORD;
8375 /* Skip saved base pointer. */
8376 if (frame_pointer_needed)
8377 offset += UNITS_PER_WORD;
8379 frame->hard_frame_pointer_offset = offset;
8381 /* Set offset to aligned because the realigned frame starts from
8383 if (stack_realign_fp)
8384 offset = (offset + stack_alignment_needed -1) & -stack_alignment_needed;
8386 /* Register save area */
8387 offset += frame->nregs * UNITS_PER_WORD;
8389 /* Align SSE reg save area. */
8390 if (frame->nsseregs)
8391 frame->padding0 = ((offset + 16 - 1) & -16) - offset;
8393 frame->padding0 = 0;
8395 /* SSE register save area. */
8396 offset += frame->padding0 + frame->nsseregs * 16;
8399 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
8400 offset += frame->va_arg_size;
8402 /* Align start of frame for local function. */
8403 frame->padding1 = ((offset + stack_alignment_needed - 1)
8404 & -stack_alignment_needed) - offset;
8406 offset += frame->padding1;
8408 /* Frame pointer points here. */
8409 frame->frame_pointer_offset = offset;
8413 /* Add outgoing arguments area. Can be skipped if we eliminated
8414 all the function calls as dead code.
8415 Skipping is however impossible when function calls alloca. Alloca
8416 expander assumes that last crtl->outgoing_args_size
8417 of stack frame are unused. */
8418 if (ACCUMULATE_OUTGOING_ARGS
8419 && (!current_function_is_leaf || cfun->calls_alloca
8420 || ix86_current_function_calls_tls_descriptor))
8422 offset += crtl->outgoing_args_size;
8423 frame->outgoing_arguments_size = crtl->outgoing_args_size;
8426 frame->outgoing_arguments_size = 0;
8428 /* Align stack boundary. Only needed if we're calling another function
8430 if (!current_function_is_leaf || cfun->calls_alloca
8431 || ix86_current_function_calls_tls_descriptor)
8432 frame->padding2 = ((offset + preferred_alignment - 1)
8433 & -preferred_alignment) - offset;
8435 frame->padding2 = 0;
8437 offset += frame->padding2;
8439 /* We've reached end of stack frame. */
8440 frame->stack_pointer_offset = offset;
8442 /* Size prologue needs to allocate. */
8443 frame->to_allocate =
8444 (size + frame->padding1 + frame->padding2
8445 + frame->outgoing_arguments_size + frame->va_arg_size);
8447 if ((!frame->to_allocate && frame->nregs <= 1)
8448 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
8449 frame->save_regs_using_mov = false;
8451 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
8452 && current_function_sp_is_unchanging
8453 && current_function_is_leaf
8454 && !ix86_current_function_calls_tls_descriptor)
8456 frame->red_zone_size = frame->to_allocate;
8457 if (frame->save_regs_using_mov)
8458 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8459 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
8460 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
8463 frame->red_zone_size = 0;
8464 frame->to_allocate -= frame->red_zone_size;
8465 frame->stack_pointer_offset -= frame->red_zone_size;
8468 /* Emit code to save registers in the prologue. */
8471 ix86_emit_save_regs (void)
8476 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
8477 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8479 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
8480 RTX_FRAME_RELATED_P (insn) = 1;
8484 /* Emit code to save registers using MOV insns. First register
8485 is restored from POINTER + OFFSET. */
8487 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
8492 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8493 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8495 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
8497 gen_rtx_REG (Pmode, regno));
8498 RTX_FRAME_RELATED_P (insn) = 1;
8499 offset += UNITS_PER_WORD;
8503 /* Emit code to save registers using MOV insns. First register
8504 is restored from POINTER + OFFSET. */
8506 ix86_emit_save_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
8512 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8513 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8515 mem = adjust_address (gen_rtx_MEM (TImode, pointer), TImode, offset);
8516 set_mem_align (mem, 128);
8517 insn = emit_move_insn (mem, gen_rtx_REG (TImode, regno));
8518 RTX_FRAME_RELATED_P (insn) = 1;
8523 static GTY(()) rtx queued_cfa_restores;
8525 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
8526 manipulation insn. Don't add it if the previously
8527 saved value will be left untouched within stack red-zone till return,
8528 as unwinders can find the same value in the register and
8532 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT red_offset)
8535 && !TARGET_64BIT_MS_ABI
8536 && red_offset + RED_ZONE_SIZE >= 0
8537 && crtl->args.pops_args < 65536)
8542 add_reg_note (insn, REG_CFA_RESTORE, reg);
8543 RTX_FRAME_RELATED_P (insn) = 1;
8547 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
8550 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
8553 ix86_add_queued_cfa_restore_notes (rtx insn)
8556 if (!queued_cfa_restores)
8558 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
8560 XEXP (last, 1) = REG_NOTES (insn);
8561 REG_NOTES (insn) = queued_cfa_restores;
8562 queued_cfa_restores = NULL_RTX;
8563 RTX_FRAME_RELATED_P (insn) = 1;
8566 /* Expand prologue or epilogue stack adjustment.
8567 The pattern exist to put a dependency on all ebp-based memory accesses.
8568 STYLE should be negative if instructions should be marked as frame related,
8569 zero if %r11 register is live and cannot be freely used and positive
8573 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
8574 int style, bool set_cfa)
8579 insn = emit_insn (gen_pro_epilogue_adjust_stack_si_1 (dest, src, offset));
8580 else if (x86_64_immediate_operand (offset, DImode))
8581 insn = emit_insn (gen_pro_epilogue_adjust_stack_di_1 (dest, src, offset));
8585 /* r11 is used by indirect sibcall return as well, set before the
8586 epilogue and used after the epilogue. */
8588 tmp = gen_rtx_REG (DImode, R11_REG);
8591 gcc_assert (src != hard_frame_pointer_rtx
8592 && dest != hard_frame_pointer_rtx);
8593 tmp = hard_frame_pointer_rtx;
8595 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
8597 RTX_FRAME_RELATED_P (insn) = 1;
8598 insn = emit_insn (gen_pro_epilogue_adjust_stack_di_2 (dest, src, tmp,
8603 ix86_add_queued_cfa_restore_notes (insn);
8609 gcc_assert (ix86_cfa_state->reg == src);
8610 ix86_cfa_state->offset += INTVAL (offset);
8611 ix86_cfa_state->reg = dest;
8613 r = gen_rtx_PLUS (Pmode, src, offset);
8614 r = gen_rtx_SET (VOIDmode, dest, r);
8615 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
8616 RTX_FRAME_RELATED_P (insn) = 1;
8619 RTX_FRAME_RELATED_P (insn) = 1;
8622 /* Find an available register to be used as dynamic realign argument
8623 pointer regsiter. Such a register will be written in prologue and
8624 used in begin of body, so it must not be
8625 1. parameter passing register.
8627 We reuse static-chain register if it is available. Otherwise, we
8628 use DI for i386 and R13 for x86-64. We chose R13 since it has
8631 Return: the regno of chosen register. */
8634 find_drap_reg (void)
8636 tree decl = cfun->decl;
8640 /* Use R13 for nested function or function need static chain.
8641 Since function with tail call may use any caller-saved
8642 registers in epilogue, DRAP must not use caller-saved
8643 register in such case. */
8644 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
8651 /* Use DI for nested function or function need static chain.
8652 Since function with tail call may use any caller-saved
8653 registers in epilogue, DRAP must not use caller-saved
8654 register in such case. */
8655 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
8658 /* Reuse static chain register if it isn't used for parameter
8660 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
8661 && !lookup_attribute ("fastcall",
8662 TYPE_ATTRIBUTES (TREE_TYPE (decl)))
8663 && !lookup_attribute ("thiscall",
8664 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
8671 /* Return minimum incoming stack alignment. */
8674 ix86_minimum_incoming_stack_boundary (bool sibcall)
8676 unsigned int incoming_stack_boundary;
8678 /* Prefer the one specified at command line. */
8679 if (ix86_user_incoming_stack_boundary)
8680 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
8681 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
8682 if -mstackrealign is used, it isn't used for sibcall check and
8683 estimated stack alignment is 128bit. */
8686 && ix86_force_align_arg_pointer
8687 && crtl->stack_alignment_estimated == 128)
8688 incoming_stack_boundary = MIN_STACK_BOUNDARY;
8690 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
8692 /* Incoming stack alignment can be changed on individual functions
8693 via force_align_arg_pointer attribute. We use the smallest
8694 incoming stack boundary. */
8695 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
8696 && lookup_attribute (ix86_force_align_arg_pointer_string,
8697 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
8698 incoming_stack_boundary = MIN_STACK_BOUNDARY;
8700 /* The incoming stack frame has to be aligned at least at
8701 parm_stack_boundary. */
8702 if (incoming_stack_boundary < crtl->parm_stack_boundary)
8703 incoming_stack_boundary = crtl->parm_stack_boundary;
8705 /* Stack at entrance of main is aligned by runtime. We use the
8706 smallest incoming stack boundary. */
8707 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
8708 && DECL_NAME (current_function_decl)
8709 && MAIN_NAME_P (DECL_NAME (current_function_decl))
8710 && DECL_FILE_SCOPE_P (current_function_decl))
8711 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
8713 return incoming_stack_boundary;
8716 /* Update incoming stack boundary and estimated stack alignment. */
8719 ix86_update_stack_boundary (void)
8721 ix86_incoming_stack_boundary
8722 = ix86_minimum_incoming_stack_boundary (false);
8724 /* x86_64 vararg needs 16byte stack alignment for register save
8728 && crtl->stack_alignment_estimated < 128)
8729 crtl->stack_alignment_estimated = 128;
8732 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
8733 needed or an rtx for DRAP otherwise. */
8736 ix86_get_drap_rtx (void)
8738 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
8739 crtl->need_drap = true;
8741 if (stack_realign_drap)
8743 /* Assign DRAP to vDRAP and returns vDRAP */
8744 unsigned int regno = find_drap_reg ();
8749 arg_ptr = gen_rtx_REG (Pmode, regno);
8750 crtl->drap_reg = arg_ptr;
8753 drap_vreg = copy_to_reg (arg_ptr);
8757 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
8760 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
8761 RTX_FRAME_RELATED_P (insn) = 1;
8769 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
8772 ix86_internal_arg_pointer (void)
8774 return virtual_incoming_args_rtx;
8777 struct scratch_reg {
8782 /* Return a short-lived scratch register for use on function entry.
8783 In 32-bit mode, it is valid only after the registers are saved
8784 in the prologue. This register must be released by means of
8785 release_scratch_register_on_entry once it is dead. */
8788 get_scratch_register_on_entry (struct scratch_reg *sr)
8796 /* We always use R11 in 64-bit mode. */
8801 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
8803 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
8804 bool static_chain_p = DECL_STATIC_CHAIN (decl);
8805 int regparm = ix86_function_regparm (fntype, decl);
8807 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
8809 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
8810 for the static chain register. */
8811 if ((regparm < 1 || (fastcall_p && !static_chain_p))
8812 && drap_regno != AX_REG)
8814 else if (regparm < 2 && drap_regno != DX_REG)
8816 /* ecx is the static chain register. */
8817 else if (regparm < 3 && !fastcall_p && !static_chain_p
8818 && drap_regno != CX_REG)
8820 else if (ix86_save_reg (BX_REG, true))
8822 /* esi is the static chain register. */
8823 else if (!(regparm == 3 && static_chain_p)
8824 && ix86_save_reg (SI_REG, true))
8826 else if (ix86_save_reg (DI_REG, true))
8830 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
8835 sr->reg = gen_rtx_REG (Pmode, regno);
8838 rtx insn = emit_insn (gen_push (sr->reg));
8839 RTX_FRAME_RELATED_P (insn) = 1;
8843 /* Release a scratch register obtained from the preceding function. */
8846 release_scratch_register_on_entry (struct scratch_reg *sr)
8850 rtx x, insn = emit_insn (ix86_gen_pop1 (sr->reg));
8852 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
8853 RTX_FRAME_RELATED_P (insn) = 1;
8854 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
8855 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
8856 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
8860 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
8862 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
8865 ix86_adjust_stack_and_probe (HOST_WIDE_INT size)
8867 /* We skip the probe for the first interval + a small dope of 4 words and
8868 probe that many bytes past the specified size to maintain a protection
8869 area at the botton of the stack. */
8870 const int dope = 4 * UNITS_PER_WORD;
8871 rtx size_rtx = GEN_INT (size);
8873 /* See if we have a constant small number of probes to generate. If so,
8874 that's the easy case. The run-time loop is made up of 11 insns in the
8875 generic case while the compile-time loop is made up of 3+2*(n-1) insns
8876 for n # of intervals. */
8877 if (size <= 5 * PROBE_INTERVAL)
8879 HOST_WIDE_INT i, adjust;
8880 bool first_probe = true;
8882 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
8883 values of N from 1 until it exceeds SIZE. If only one probe is
8884 needed, this will not generate any code. Then adjust and probe
8885 to PROBE_INTERVAL + SIZE. */
8886 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
8890 adjust = 2 * PROBE_INTERVAL + dope;
8891 first_probe = false;
8894 adjust = PROBE_INTERVAL;
8896 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8897 plus_constant (stack_pointer_rtx, -adjust)));
8898 emit_stack_probe (stack_pointer_rtx);
8902 adjust = size + PROBE_INTERVAL + dope;
8904 adjust = size + PROBE_INTERVAL - i;
8906 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8907 plus_constant (stack_pointer_rtx, -adjust)));
8908 emit_stack_probe (stack_pointer_rtx);
8910 /* Adjust back to account for the additional first interval. */
8911 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8912 plus_constant (stack_pointer_rtx,
8913 PROBE_INTERVAL + dope)));
8916 /* Otherwise, do the same as above, but in a loop. Note that we must be
8917 extra careful with variables wrapping around because we might be at
8918 the very top (or the very bottom) of the address space and we have
8919 to be able to handle this case properly; in particular, we use an
8920 equality test for the loop condition. */
8923 HOST_WIDE_INT rounded_size;
8924 struct scratch_reg sr;
8926 get_scratch_register_on_entry (&sr);
8929 /* Step 1: round SIZE to the previous multiple of the interval. */
8931 rounded_size = size & -PROBE_INTERVAL;
8934 /* Step 2: compute initial and final value of the loop counter. */
8936 /* SP = SP_0 + PROBE_INTERVAL. */
8937 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8938 plus_constant (stack_pointer_rtx,
8939 - (PROBE_INTERVAL + dope))));
8941 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
8942 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
8943 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
8944 gen_rtx_PLUS (Pmode, sr.reg,
8945 stack_pointer_rtx)));
8950 while (SP != LAST_ADDR)
8952 SP = SP + PROBE_INTERVAL
8956 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
8957 values of N from 1 until it is equal to ROUNDED_SIZE. */
8959 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
8962 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
8963 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
8965 if (size != rounded_size)
8967 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8968 plus_constant (stack_pointer_rtx,
8969 rounded_size - size)));
8970 emit_stack_probe (stack_pointer_rtx);
8973 /* Adjust back to account for the additional first interval. */
8974 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8975 plus_constant (stack_pointer_rtx,
8976 PROBE_INTERVAL + dope)));
8978 release_scratch_register_on_entry (&sr);
8981 gcc_assert (ix86_cfa_state->reg != stack_pointer_rtx);
8983 /* Make sure nothing is scheduled before we are done. */
8984 emit_insn (gen_blockage ());
8987 /* Adjust the stack pointer up to REG while probing it. */
8990 output_adjust_stack_and_probe (rtx reg)
8992 static int labelno = 0;
8993 char loop_lab[32], end_lab[32];
8996 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
8997 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
8999 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9001 /* Jump to END_LAB if SP == LAST_ADDR. */
9002 xops[0] = stack_pointer_rtx;
9004 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9005 fputs ("\tje\t", asm_out_file);
9006 assemble_name_raw (asm_out_file, end_lab);
9007 fputc ('\n', asm_out_file);
9009 /* SP = SP + PROBE_INTERVAL. */
9010 xops[1] = GEN_INT (PROBE_INTERVAL);
9011 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9014 xops[1] = const0_rtx;
9015 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
9017 fprintf (asm_out_file, "\tjmp\t");
9018 assemble_name_raw (asm_out_file, loop_lab);
9019 fputc ('\n', asm_out_file);
9021 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9026 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9027 inclusive. These are offsets from the current stack pointer. */
9030 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
9032 /* See if we have a constant small number of probes to generate. If so,
9033 that's the easy case. The run-time loop is made up of 7 insns in the
9034 generic case while the compile-time loop is made up of n insns for n #
9036 if (size <= 7 * PROBE_INTERVAL)
9040 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9041 it exceeds SIZE. If only one probe is needed, this will not
9042 generate any code. Then probe at FIRST + SIZE. */
9043 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9044 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
9046 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
9049 /* Otherwise, do the same as above, but in a loop. Note that we must be
9050 extra careful with variables wrapping around because we might be at
9051 the very top (or the very bottom) of the address space and we have
9052 to be able to handle this case properly; in particular, we use an
9053 equality test for the loop condition. */
9056 HOST_WIDE_INT rounded_size, last;
9057 struct scratch_reg sr;
9059 get_scratch_register_on_entry (&sr);
9062 /* Step 1: round SIZE to the previous multiple of the interval. */
9064 rounded_size = size & -PROBE_INTERVAL;
9067 /* Step 2: compute initial and final value of the loop counter. */
9069 /* TEST_OFFSET = FIRST. */
9070 emit_move_insn (sr.reg, GEN_INT (-first));
9072 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9073 last = first + rounded_size;
9078 while (TEST_ADDR != LAST_ADDR)
9080 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9084 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9085 until it is equal to ROUNDED_SIZE. */
9087 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
9090 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9091 that SIZE is equal to ROUNDED_SIZE. */
9093 if (size != rounded_size)
9094 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
9097 rounded_size - size));
9099 release_scratch_register_on_entry (&sr);
9102 /* Make sure nothing is scheduled before we are done. */
9103 emit_insn (gen_blockage ());
9106 /* Probe a range of stack addresses from REG to END, inclusive. These are
9107 offsets from the current stack pointer. */
9110 output_probe_stack_range (rtx reg, rtx end)
9112 static int labelno = 0;
9113 char loop_lab[32], end_lab[32];
9116 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9117 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9119 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9121 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
9124 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9125 fputs ("\tje\t", asm_out_file);
9126 assemble_name_raw (asm_out_file, end_lab);
9127 fputc ('\n', asm_out_file);
9129 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
9130 xops[1] = GEN_INT (PROBE_INTERVAL);
9131 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9133 /* Probe at TEST_ADDR. */
9134 xops[0] = stack_pointer_rtx;
9136 xops[2] = const0_rtx;
9137 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
9139 fprintf (asm_out_file, "\tjmp\t");
9140 assemble_name_raw (asm_out_file, loop_lab);
9141 fputc ('\n', asm_out_file);
9143 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9148 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
9149 to be generated in correct form. */
9151 ix86_finalize_stack_realign_flags (void)
9153 /* Check if stack realign is really needed after reload, and
9154 stores result in cfun */
9155 unsigned int incoming_stack_boundary
9156 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
9157 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
9158 unsigned int stack_realign = (incoming_stack_boundary
9159 < (current_function_is_leaf
9160 ? crtl->max_used_stack_slot_alignment
9161 : crtl->stack_alignment_needed));
9163 if (crtl->stack_realign_finalized)
9165 /* After stack_realign_needed is finalized, we can't no longer
9167 gcc_assert (crtl->stack_realign_needed == stack_realign);
9171 crtl->stack_realign_needed = stack_realign;
9172 crtl->stack_realign_finalized = true;
9176 /* Expand the prologue into a bunch of separate insns. */
9179 ix86_expand_prologue (void)
9183 struct ix86_frame frame;
9184 HOST_WIDE_INT allocate;
9185 int gen_frame_pointer = frame_pointer_needed;
9187 ix86_finalize_stack_realign_flags ();
9189 /* DRAP should not coexist with stack_realign_fp */
9190 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
9192 /* Initialize CFA state for before the prologue. */
9193 ix86_cfa_state->reg = stack_pointer_rtx;
9194 ix86_cfa_state->offset = INCOMING_FRAME_SP_OFFSET;
9196 ix86_compute_frame_layout (&frame);
9198 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
9202 /* Make sure the function starts with
9203 8b ff movl.s %edi,%edi (emited by ix86_asm_output_function_label)
9205 8b ec movl.s %esp,%ebp
9207 This matches the hookable function prologue in Win32 API
9208 functions in Microsoft Windows XP Service Pack 2 and newer.
9209 Wine uses this to enable Windows apps to hook the Win32 API
9210 functions provided by Wine. */
9211 push = emit_insn (gen_push (hard_frame_pointer_rtx));
9212 mov = emit_insn (gen_vswapmov (hard_frame_pointer_rtx,
9213 stack_pointer_rtx));
9215 if (frame_pointer_needed && !(crtl->drap_reg
9216 && crtl->stack_realign_needed))
9218 /* The push %ebp and movl.s %esp, %ebp already set up
9219 the frame pointer. No need to do this again. */
9220 gen_frame_pointer = 0;
9221 RTX_FRAME_RELATED_P (push) = 1;
9222 RTX_FRAME_RELATED_P (mov) = 1;
9223 if (ix86_cfa_state->reg == stack_pointer_rtx)
9224 ix86_cfa_state->reg = hard_frame_pointer_rtx;
9227 /* If the frame pointer is not needed, pop %ebp again. This
9228 could be optimized for cases where ebp needs to be backed up
9229 for some other reason. If stack realignment is needed, pop
9230 the base pointer again, align the stack, and later regenerate
9231 the frame pointer setup. The frame pointer generated by the
9232 hook prologue is not aligned, so it can't be used. */
9233 insn = emit_insn (ix86_gen_pop1 (hard_frame_pointer_rtx));
9236 /* The first insn of a function that accepts its static chain on the
9237 stack is to push the register that would be filled in by a direct
9238 call. This insn will be skipped by the trampoline. */
9239 if (ix86_static_chain_on_stack)
9243 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
9244 emit_insn (gen_blockage ());
9246 /* We don't want to interpret this push insn as a register save,
9247 only as a stack adjustment. The real copy of the register as
9248 a save will be done later, if needed. */
9249 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
9250 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
9251 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
9252 RTX_FRAME_RELATED_P (insn) = 1;
9255 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
9256 of DRAP is needed and stack realignment is really needed after reload */
9257 if (crtl->drap_reg && crtl->stack_realign_needed)
9260 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9261 int param_ptr_offset = UNITS_PER_WORD;
9263 if (ix86_static_chain_on_stack)
9264 param_ptr_offset += UNITS_PER_WORD;
9265 if (!call_used_regs[REGNO (crtl->drap_reg)])
9266 param_ptr_offset += UNITS_PER_WORD;
9268 gcc_assert (stack_realign_drap);
9270 /* Grab the argument pointer. */
9271 x = plus_constant (stack_pointer_rtx, param_ptr_offset);
9274 /* Only need to push parameter pointer reg if it is caller
9276 if (!call_used_regs[REGNO (crtl->drap_reg)])
9278 /* Push arg pointer reg */
9279 insn = emit_insn (gen_push (y));
9280 RTX_FRAME_RELATED_P (insn) = 1;
9283 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
9284 RTX_FRAME_RELATED_P (insn) = 1;
9285 ix86_cfa_state->reg = crtl->drap_reg;
9287 /* Align the stack. */
9288 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
9290 GEN_INT (-align_bytes)));
9291 RTX_FRAME_RELATED_P (insn) = 1;
9293 /* Replicate the return address on the stack so that return
9294 address can be reached via (argp - 1) slot. This is needed
9295 to implement macro RETURN_ADDR_RTX and intrinsic function
9296 expand_builtin_return_addr etc. */
9298 x = gen_frame_mem (Pmode,
9299 plus_constant (x, -UNITS_PER_WORD));
9300 insn = emit_insn (gen_push (x));
9301 RTX_FRAME_RELATED_P (insn) = 1;
9304 /* Note: AT&T enter does NOT have reversed args. Enter is probably
9305 slower on all targets. Also sdb doesn't like it. */
9307 if (gen_frame_pointer)
9309 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
9310 RTX_FRAME_RELATED_P (insn) = 1;
9312 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
9313 RTX_FRAME_RELATED_P (insn) = 1;
9315 if (ix86_cfa_state->reg == stack_pointer_rtx)
9316 ix86_cfa_state->reg = hard_frame_pointer_rtx;
9319 if (stack_realign_fp)
9321 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9322 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
9324 /* Align the stack. */
9325 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
9327 GEN_INT (-align_bytes)));
9328 RTX_FRAME_RELATED_P (insn) = 1;
9331 allocate = frame.to_allocate + frame.nsseregs * 16 + frame.padding0;
9333 if (!frame.save_regs_using_mov)
9334 ix86_emit_save_regs ();
9336 allocate += frame.nregs * UNITS_PER_WORD;
9338 /* The stack has already been decremented by the instruction calling us
9339 so we need to probe unconditionally to preserve the protection area. */
9340 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
9342 /* We expect the registers to be saved when probes are used. */
9343 gcc_assert (!frame.save_regs_using_mov);
9345 if (STACK_CHECK_MOVING_SP)
9347 ix86_adjust_stack_and_probe (allocate);
9352 HOST_WIDE_INT size = allocate;
9354 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
9355 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
9357 if (TARGET_STACK_PROBE)
9358 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
9360 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
9364 /* When using red zone we may start register saving before allocating
9365 the stack frame saving one cycle of the prologue. However I will
9366 avoid doing this if I am going to have to probe the stack since
9367 at least on x86_64 the stack probe can turn into a call that clobbers
9368 a red zone location */
9369 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
9370 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
9371 ix86_emit_save_regs_using_mov ((frame_pointer_needed
9372 && !crtl->stack_realign_needed)
9373 ? hard_frame_pointer_rtx
9374 : stack_pointer_rtx,
9375 -frame.nregs * UNITS_PER_WORD);
9379 else if (!ix86_target_stack_probe () || allocate < CHECK_STACK_LIMIT)
9380 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9381 GEN_INT (-allocate), -1,
9382 ix86_cfa_state->reg == stack_pointer_rtx);
9385 rtx eax = gen_rtx_REG (Pmode, AX_REG);
9389 if (cfun->machine->call_abi == MS_ABI)
9392 eax_live = ix86_eax_live_at_start_p ();
9396 emit_insn (gen_push (eax));
9397 allocate -= UNITS_PER_WORD;
9400 emit_move_insn (eax, GEN_INT (allocate));
9402 insn = emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
9404 if (ix86_cfa_state->reg == stack_pointer_rtx)
9406 ix86_cfa_state->offset += allocate;
9407 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
9408 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
9409 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
9410 RTX_FRAME_RELATED_P (insn) = 1;
9415 if (frame_pointer_needed)
9416 t = plus_constant (hard_frame_pointer_rtx,
9419 - frame.nregs * UNITS_PER_WORD);
9421 t = plus_constant (stack_pointer_rtx, allocate);
9422 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
9426 if (frame.save_regs_using_mov
9427 && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
9428 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
9430 if (!frame_pointer_needed
9431 || !(frame.to_allocate + frame.padding0)
9432 || crtl->stack_realign_needed)
9433 ix86_emit_save_regs_using_mov (stack_pointer_rtx,
9435 + frame.nsseregs * 16 + frame.padding0);
9437 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
9438 -frame.nregs * UNITS_PER_WORD);
9440 if (!frame_pointer_needed
9441 || !(frame.to_allocate + frame.padding0)
9442 || crtl->stack_realign_needed)
9443 ix86_emit_save_sse_regs_using_mov (stack_pointer_rtx,
9446 ix86_emit_save_sse_regs_using_mov (hard_frame_pointer_rtx,
9447 - frame.nregs * UNITS_PER_WORD
9448 - frame.nsseregs * 16
9451 pic_reg_used = false;
9452 if (pic_offset_table_rtx
9453 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9456 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
9458 if (alt_pic_reg_used != INVALID_REGNUM)
9459 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
9461 pic_reg_used = true;
9468 if (ix86_cmodel == CM_LARGE_PIC)
9470 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
9471 rtx label = gen_label_rtx ();
9473 LABEL_PRESERVE_P (label) = 1;
9474 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
9475 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
9476 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
9477 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
9478 pic_offset_table_rtx, tmp_reg));
9481 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
9484 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
9487 /* In the pic_reg_used case, make sure that the got load isn't deleted
9488 when mcount needs it. Blockage to avoid call movement across mcount
9489 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
9491 if (crtl->profile && pic_reg_used)
9492 emit_insn (gen_prologue_use (pic_offset_table_rtx));
9494 if (crtl->drap_reg && !crtl->stack_realign_needed)
9496 /* vDRAP is setup but after reload it turns out stack realign
9497 isn't necessary, here we will emit prologue to setup DRAP
9498 without stack realign adjustment */
9500 int drap_bp_offset = UNITS_PER_WORD * 2;
9502 if (ix86_static_chain_on_stack)
9503 drap_bp_offset += UNITS_PER_WORD;
9504 x = plus_constant (hard_frame_pointer_rtx, drap_bp_offset);
9505 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, x));
9508 /* Prevent instructions from being scheduled into register save push
9509 sequence when access to the redzone area is done through frame pointer.
9510 The offset between the frame pointer and the stack pointer is calculated
9511 relative to the value of the stack pointer at the end of the function
9512 prologue, and moving instructions that access redzone area via frame
9513 pointer inside push sequence violates this assumption. */
9514 if (frame_pointer_needed && frame.red_zone_size)
9515 emit_insn (gen_memory_blockage ());
9517 /* Emit cld instruction if stringops are used in the function. */
9518 if (TARGET_CLD && ix86_current_function_needs_cld)
9519 emit_insn (gen_cld ());
9522 /* Emit code to restore REG using a POP insn. */
9525 ix86_emit_restore_reg_using_pop (rtx reg, HOST_WIDE_INT red_offset)
9527 rtx insn = emit_insn (ix86_gen_pop1 (reg));
9529 if (ix86_cfa_state->reg == crtl->drap_reg
9530 && REGNO (reg) == REGNO (crtl->drap_reg))
9532 /* Previously we'd represented the CFA as an expression
9533 like *(%ebp - 8). We've just popped that value from
9534 the stack, which means we need to reset the CFA to
9535 the drap register. This will remain until we restore
9536 the stack pointer. */
9537 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9538 RTX_FRAME_RELATED_P (insn) = 1;
9542 if (ix86_cfa_state->reg == stack_pointer_rtx)
9544 ix86_cfa_state->offset -= UNITS_PER_WORD;
9545 add_reg_note (insn, REG_CFA_ADJUST_CFA,
9546 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
9547 RTX_FRAME_RELATED_P (insn) = 1;
9550 /* When the frame pointer is the CFA, and we pop it, we are
9551 swapping back to the stack pointer as the CFA. This happens
9552 for stack frames that don't allocate other data, so we assume
9553 the stack pointer is now pointing at the return address, i.e.
9554 the function entry state, which makes the offset be 1 word. */
9555 else if (ix86_cfa_state->reg == hard_frame_pointer_rtx
9556 && reg == hard_frame_pointer_rtx)
9558 ix86_cfa_state->reg = stack_pointer_rtx;
9559 ix86_cfa_state->offset -= UNITS_PER_WORD;
9561 add_reg_note (insn, REG_CFA_DEF_CFA,
9562 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
9563 GEN_INT (ix86_cfa_state->offset)));
9564 RTX_FRAME_RELATED_P (insn) = 1;
9567 ix86_add_cfa_restore_note (insn, reg, red_offset);
9570 /* Emit code to restore saved registers using POP insns. */
9573 ix86_emit_restore_regs_using_pop (HOST_WIDE_INT red_offset)
9577 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9578 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
9580 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno),
9582 red_offset += UNITS_PER_WORD;
9586 /* Emit code and notes for the LEAVE instruction. */
9589 ix86_emit_leave (HOST_WIDE_INT red_offset)
9591 rtx insn = emit_insn (ix86_gen_leave ());
9593 ix86_add_queued_cfa_restore_notes (insn);
9595 if (ix86_cfa_state->reg == hard_frame_pointer_rtx)
9597 ix86_cfa_state->reg = stack_pointer_rtx;
9598 ix86_cfa_state->offset -= UNITS_PER_WORD;
9600 add_reg_note (insn, REG_CFA_ADJUST_CFA,
9601 copy_rtx (XVECEXP (PATTERN (insn), 0, 0)));
9602 RTX_FRAME_RELATED_P (insn) = 1;
9603 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx, red_offset);
9607 /* Emit code to restore saved registers using MOV insns. First register
9608 is restored from POINTER + OFFSET. */
9610 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
9611 HOST_WIDE_INT red_offset,
9612 int maybe_eh_return)
9615 rtx base_address = gen_rtx_MEM (Pmode, pointer);
9618 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9619 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
9621 rtx reg = gen_rtx_REG (Pmode, regno);
9623 /* Ensure that adjust_address won't be forced to produce pointer
9624 out of range allowed by x86-64 instruction set. */
9625 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
9629 r11 = gen_rtx_REG (DImode, R11_REG);
9630 emit_move_insn (r11, GEN_INT (offset));
9631 emit_insn (gen_adddi3 (r11, r11, pointer));
9632 base_address = gen_rtx_MEM (Pmode, r11);
9635 insn = emit_move_insn (reg,
9636 adjust_address (base_address, Pmode, offset));
9637 offset += UNITS_PER_WORD;
9639 if (ix86_cfa_state->reg == crtl->drap_reg
9640 && regno == REGNO (crtl->drap_reg))
9642 /* Previously we'd represented the CFA as an expression
9643 like *(%ebp - 8). We've just popped that value from
9644 the stack, which means we need to reset the CFA to
9645 the drap register. This will remain until we restore
9646 the stack pointer. */
9647 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9648 RTX_FRAME_RELATED_P (insn) = 1;
9651 ix86_add_cfa_restore_note (NULL_RTX, reg, red_offset);
9653 red_offset += UNITS_PER_WORD;
9657 /* Emit code to restore saved registers using MOV insns. First register
9658 is restored from POINTER + OFFSET. */
9660 ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
9661 HOST_WIDE_INT red_offset,
9662 int maybe_eh_return)
9665 rtx base_address = gen_rtx_MEM (TImode, pointer);
9668 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9669 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
9671 rtx reg = gen_rtx_REG (TImode, regno);
9673 /* Ensure that adjust_address won't be forced to produce pointer
9674 out of range allowed by x86-64 instruction set. */
9675 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
9679 r11 = gen_rtx_REG (DImode, R11_REG);
9680 emit_move_insn (r11, GEN_INT (offset));
9681 emit_insn (gen_adddi3 (r11, r11, pointer));
9682 base_address = gen_rtx_MEM (TImode, r11);
9685 mem = adjust_address (base_address, TImode, offset);
9686 set_mem_align (mem, 128);
9687 emit_move_insn (reg, mem);
9690 ix86_add_cfa_restore_note (NULL_RTX, reg, red_offset);
9696 /* Restore function stack, frame, and registers. */
9699 ix86_expand_epilogue (int style)
9702 struct ix86_frame frame;
9703 HOST_WIDE_INT offset, red_offset;
9704 struct machine_cfa_state cfa_state_save = *ix86_cfa_state;
9707 ix86_finalize_stack_realign_flags ();
9709 /* When stack is realigned, SP must be valid. */
9710 sp_valid = (!frame_pointer_needed
9711 || current_function_sp_is_unchanging
9712 || stack_realign_fp);
9714 ix86_compute_frame_layout (&frame);
9716 /* See the comment about red zone and frame
9717 pointer usage in ix86_expand_prologue. */
9718 if (frame_pointer_needed && frame.red_zone_size)
9719 emit_insn (gen_memory_blockage ());
9721 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
9722 gcc_assert (!using_drap || ix86_cfa_state->reg == crtl->drap_reg);
9724 /* Calculate start of saved registers relative to ebp. Special care
9725 must be taken for the normal return case of a function using
9726 eh_return: the eax and edx registers are marked as saved, but not
9727 restored along this path. */
9728 offset = frame.nregs;
9729 if (crtl->calls_eh_return && style != 2)
9731 offset *= -UNITS_PER_WORD;
9732 offset -= frame.nsseregs * 16 + frame.padding0;
9734 /* Calculate start of saved registers relative to esp on entry of the
9735 function. When realigning stack, this needs to be the most negative
9736 value possible at runtime. */
9737 red_offset = offset;
9739 red_offset -= crtl->stack_alignment_needed / BITS_PER_UNIT
9741 else if (stack_realign_fp)
9742 red_offset -= crtl->stack_alignment_needed / BITS_PER_UNIT
9744 if (ix86_static_chain_on_stack)
9745 red_offset -= UNITS_PER_WORD;
9746 if (frame_pointer_needed)
9747 red_offset -= UNITS_PER_WORD;
9749 /* If we're only restoring one register and sp is not valid then
9750 using a move instruction to restore the register since it's
9751 less work than reloading sp and popping the register.
9753 The default code result in stack adjustment using add/lea instruction,
9754 while this code results in LEAVE instruction (or discrete equivalent),
9755 so it is profitable in some other cases as well. Especially when there
9756 are no registers to restore. We also use this code when TARGET_USE_LEAVE
9757 and there is exactly one register to pop. This heuristic may need some
9758 tuning in future. */
9759 if ((!sp_valid && (frame.nregs + frame.nsseregs) <= 1)
9760 || (TARGET_EPILOGUE_USING_MOVE
9761 && cfun->machine->use_fast_prologue_epilogue
9762 && ((frame.nregs + frame.nsseregs) > 1
9763 || (frame.to_allocate + frame.padding0) != 0))
9764 || (frame_pointer_needed && !(frame.nregs + frame.nsseregs)
9765 && (frame.to_allocate + frame.padding0) != 0)
9766 || (frame_pointer_needed && TARGET_USE_LEAVE
9767 && cfun->machine->use_fast_prologue_epilogue
9768 && (frame.nregs + frame.nsseregs) == 1)
9769 || crtl->calls_eh_return)
9771 /* Restore registers. We can use ebp or esp to address the memory
9772 locations. If both are available, default to ebp, since offsets
9773 are known to be small. Only exception is esp pointing directly
9774 to the end of block of saved registers, where we may simplify
9777 If we are realigning stack with bp and sp, regs restore can't
9778 be addressed by bp. sp must be used instead. */
9780 if (!frame_pointer_needed
9781 || (sp_valid && !(frame.to_allocate + frame.padding0))
9782 || stack_realign_fp)
9784 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
9785 frame.to_allocate, red_offset,
9787 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
9789 + frame.nsseregs * 16
9792 + frame.nsseregs * 16
9793 + frame.padding0, style == 2);
9797 ix86_emit_restore_sse_regs_using_mov (hard_frame_pointer_rtx,
9800 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
9802 + frame.nsseregs * 16
9805 + frame.nsseregs * 16
9806 + frame.padding0, style == 2);
9809 red_offset -= offset;
9811 /* eh_return epilogues need %ecx added to the stack pointer. */
9814 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
9816 /* Stack align doesn't work with eh_return. */
9817 gcc_assert (!crtl->stack_realign_needed);
9818 /* Neither does regparm nested functions. */
9819 gcc_assert (!ix86_static_chain_on_stack);
9821 if (frame_pointer_needed)
9823 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
9824 tmp = plus_constant (tmp, UNITS_PER_WORD);
9825 tmp = emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
9827 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
9828 tmp = emit_move_insn (hard_frame_pointer_rtx, tmp);
9830 /* Note that we use SA as a temporary CFA, as the return
9831 address is at the proper place relative to it. We
9832 pretend this happens at the FP restore insn because
9833 prior to this insn the FP would be stored at the wrong
9834 offset relative to SA, and after this insn we have no
9835 other reasonable register to use for the CFA. We don't
9836 bother resetting the CFA to the SP for the duration of
9838 add_reg_note (tmp, REG_CFA_DEF_CFA,
9839 plus_constant (sa, UNITS_PER_WORD));
9840 ix86_add_queued_cfa_restore_notes (tmp);
9841 add_reg_note (tmp, REG_CFA_RESTORE, hard_frame_pointer_rtx);
9842 RTX_FRAME_RELATED_P (tmp) = 1;
9843 ix86_cfa_state->reg = sa;
9844 ix86_cfa_state->offset = UNITS_PER_WORD;
9846 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
9847 const0_rtx, style, false);
9851 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
9852 tmp = plus_constant (tmp, (frame.to_allocate
9853 + frame.nregs * UNITS_PER_WORD
9854 + frame.nsseregs * 16
9856 tmp = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
9857 ix86_add_queued_cfa_restore_notes (tmp);
9859 gcc_assert (ix86_cfa_state->reg == stack_pointer_rtx);
9860 if (ix86_cfa_state->offset != UNITS_PER_WORD)
9862 ix86_cfa_state->offset = UNITS_PER_WORD;
9863 add_reg_note (tmp, REG_CFA_DEF_CFA,
9864 plus_constant (stack_pointer_rtx,
9866 RTX_FRAME_RELATED_P (tmp) = 1;
9870 else if (!frame_pointer_needed)
9871 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9872 GEN_INT (frame.to_allocate
9873 + frame.nregs * UNITS_PER_WORD
9874 + frame.nsseregs * 16
9876 style, !using_drap);
9877 /* If not an i386, mov & pop is faster than "leave". */
9878 else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun)
9879 || !cfun->machine->use_fast_prologue_epilogue)
9880 ix86_emit_leave (red_offset);
9883 pro_epilogue_adjust_stack (stack_pointer_rtx,
9884 hard_frame_pointer_rtx,
9885 const0_rtx, style, !using_drap);
9887 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx, red_offset);
9892 /* First step is to deallocate the stack frame so that we can
9895 If we realign stack with frame pointer, then stack pointer
9896 won't be able to recover via lea $offset(%bp), %sp, because
9897 there is a padding area between bp and sp for realign.
9898 "add $to_allocate, %sp" must be used instead. */
9901 gcc_assert (frame_pointer_needed);
9902 gcc_assert (!stack_realign_fp);
9903 pro_epilogue_adjust_stack (stack_pointer_rtx,
9904 hard_frame_pointer_rtx,
9905 GEN_INT (offset), style, false);
9906 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
9909 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9910 GEN_INT (frame.nsseregs * 16
9914 else if (frame.to_allocate || frame.padding0 || frame.nsseregs)
9916 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
9917 frame.to_allocate, red_offset,
9919 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9920 GEN_INT (frame.to_allocate
9921 + frame.nsseregs * 16
9922 + frame.padding0), style,
9923 !using_drap && !frame_pointer_needed);
9926 ix86_emit_restore_regs_using_pop (red_offset + frame.nsseregs * 16
9928 red_offset -= offset;
9930 if (frame_pointer_needed)
9932 /* Leave results in shorter dependency chains on CPUs that are
9933 able to grok it fast. */
9934 if (TARGET_USE_LEAVE)
9935 ix86_emit_leave (red_offset);
9938 /* For stack realigned really happens, recover stack
9939 pointer to hard frame pointer is a must, if not using
9941 if (stack_realign_fp)
9942 pro_epilogue_adjust_stack (stack_pointer_rtx,
9943 hard_frame_pointer_rtx,
9944 const0_rtx, style, !using_drap);
9945 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx,
9953 int param_ptr_offset = UNITS_PER_WORD;
9956 gcc_assert (stack_realign_drap);
9958 if (ix86_static_chain_on_stack)
9959 param_ptr_offset += UNITS_PER_WORD;
9960 if (!call_used_regs[REGNO (crtl->drap_reg)])
9961 param_ptr_offset += UNITS_PER_WORD;
9963 insn = emit_insn (gen_rtx_SET
9964 (VOIDmode, stack_pointer_rtx,
9965 gen_rtx_PLUS (Pmode,
9967 GEN_INT (-param_ptr_offset))));
9968 ix86_cfa_state->reg = stack_pointer_rtx;
9969 ix86_cfa_state->offset = param_ptr_offset;
9971 add_reg_note (insn, REG_CFA_DEF_CFA,
9972 gen_rtx_PLUS (Pmode, ix86_cfa_state->reg,
9973 GEN_INT (ix86_cfa_state->offset)));
9974 RTX_FRAME_RELATED_P (insn) = 1;
9976 if (!call_used_regs[REGNO (crtl->drap_reg)])
9977 ix86_emit_restore_reg_using_pop (crtl->drap_reg, -UNITS_PER_WORD);
9980 /* Remove the saved static chain from the stack. The use of ECX is
9981 merely as a scratch register, not as the actual static chain. */
9982 if (ix86_static_chain_on_stack)
9986 gcc_assert (ix86_cfa_state->reg == stack_pointer_rtx);
9987 ix86_cfa_state->offset += UNITS_PER_WORD;
9989 r = gen_rtx_REG (Pmode, CX_REG);
9990 insn = emit_insn (ix86_gen_pop1 (r));
9992 r = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
9993 r = gen_rtx_SET (VOIDmode, stack_pointer_rtx, r);
9994 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
9995 RTX_FRAME_RELATED_P (insn) = 1;
9998 /* Sibcall epilogues don't want a return instruction. */
10001 *ix86_cfa_state = cfa_state_save;
10005 if (crtl->args.pops_args && crtl->args.size)
10007 rtx popc = GEN_INT (crtl->args.pops_args);
10009 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10010 address, do explicit add, and jump indirectly to the caller. */
10012 if (crtl->args.pops_args >= 65536)
10014 rtx ecx = gen_rtx_REG (SImode, CX_REG);
10017 /* There is no "pascal" calling convention in any 64bit ABI. */
10018 gcc_assert (!TARGET_64BIT);
10020 insn = emit_insn (gen_popsi1 (ecx));
10021 ix86_cfa_state->offset -= UNITS_PER_WORD;
10023 add_reg_note (insn, REG_CFA_ADJUST_CFA,
10024 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
10025 add_reg_note (insn, REG_CFA_REGISTER,
10026 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
10027 RTX_FRAME_RELATED_P (insn) = 1;
10029 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10031 emit_jump_insn (gen_return_indirect_internal (ecx));
10034 emit_jump_insn (gen_return_pop_internal (popc));
10037 emit_jump_insn (gen_return_internal ());
10039 /* Restore the state back to the state from the prologue,
10040 so that it's correct for the next epilogue. */
10041 *ix86_cfa_state = cfa_state_save;
10044 /* Reset from the function's potential modifications. */
10047 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
10048 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
10050 if (pic_offset_table_rtx)
10051 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
10053 /* Mach-O doesn't support labels at the end of objects, so if
10054 it looks like we might want one, insert a NOP. */
10056 rtx insn = get_last_insn ();
10059 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
10060 insn = PREV_INSN (insn);
10064 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
10065 fputs ("\tnop\n", file);
10071 /* Extract the parts of an RTL expression that is a valid memory address
10072 for an instruction. Return 0 if the structure of the address is
10073 grossly off. Return -1 if the address contains ASHIFT, so it is not
10074 strictly valid, but still used for computing length of lea instruction. */
10077 ix86_decompose_address (rtx addr, struct ix86_address *out)
10079 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
10080 rtx base_reg, index_reg;
10081 HOST_WIDE_INT scale = 1;
10082 rtx scale_rtx = NULL_RTX;
10085 enum ix86_address_seg seg = SEG_DEFAULT;
10087 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
10089 else if (GET_CODE (addr) == PLUS)
10091 rtx addends[4], op;
10099 addends[n++] = XEXP (op, 1);
10102 while (GET_CODE (op) == PLUS);
10107 for (i = n; i >= 0; --i)
10110 switch (GET_CODE (op))
10115 index = XEXP (op, 0);
10116 scale_rtx = XEXP (op, 1);
10122 index = XEXP (op, 0);
10123 tmp = XEXP (op, 1);
10124 if (!CONST_INT_P (tmp))
10126 scale = INTVAL (tmp);
10127 if ((unsigned HOST_WIDE_INT) scale > 3)
10129 scale = 1 << scale;
10133 if (XINT (op, 1) == UNSPEC_TP
10134 && TARGET_TLS_DIRECT_SEG_REFS
10135 && seg == SEG_DEFAULT)
10136 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
10165 else if (GET_CODE (addr) == MULT)
10167 index = XEXP (addr, 0); /* index*scale */
10168 scale_rtx = XEXP (addr, 1);
10170 else if (GET_CODE (addr) == ASHIFT)
10172 /* We're called for lea too, which implements ashift on occasion. */
10173 index = XEXP (addr, 0);
10174 tmp = XEXP (addr, 1);
10175 if (!CONST_INT_P (tmp))
10177 scale = INTVAL (tmp);
10178 if ((unsigned HOST_WIDE_INT) scale > 3)
10180 scale = 1 << scale;
10184 disp = addr; /* displacement */
10186 /* Extract the integral value of scale. */
10189 if (!CONST_INT_P (scale_rtx))
10191 scale = INTVAL (scale_rtx);
10194 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
10195 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
10197 /* Avoid useless 0 displacement. */
10198 if (disp == const0_rtx && (base || index))
10201 /* Allow arg pointer and stack pointer as index if there is not scaling. */
10202 if (base_reg && index_reg && scale == 1
10203 && (index_reg == arg_pointer_rtx
10204 || index_reg == frame_pointer_rtx
10205 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
10208 tmp = base, base = index, index = tmp;
10209 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
10212 /* Special case: %ebp cannot be encoded as a base without a displacement.
10216 && (base_reg == hard_frame_pointer_rtx
10217 || base_reg == frame_pointer_rtx
10218 || base_reg == arg_pointer_rtx
10219 || (REG_P (base_reg)
10220 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
10221 || REGNO (base_reg) == R13_REG))))
10224 /* Special case: on K6, [%esi] makes the instruction vector decoded.
10225 Avoid this by transforming to [%esi+0].
10226 Reload calls address legitimization without cfun defined, so we need
10227 to test cfun for being non-NULL. */
10228 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
10229 && base_reg && !index_reg && !disp
10230 && REG_P (base_reg)
10231 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
10234 /* Special case: encode reg+reg instead of reg*2. */
10235 if (!base && index && scale == 2)
10236 base = index, base_reg = index_reg, scale = 1;
10238 /* Special case: scaling cannot be encoded without base or displacement. */
10239 if (!base && !disp && index && scale != 1)
10243 out->index = index;
10245 out->scale = scale;
10251 /* Return cost of the memory address x.
10252 For i386, it is better to use a complex address than let gcc copy
10253 the address into a reg and make a new pseudo. But not if the address
10254 requires to two regs - that would mean more pseudos with longer
10257 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
10259 struct ix86_address parts;
10261 int ok = ix86_decompose_address (x, &parts);
10265 if (parts.base && GET_CODE (parts.base) == SUBREG)
10266 parts.base = SUBREG_REG (parts.base);
10267 if (parts.index && GET_CODE (parts.index) == SUBREG)
10268 parts.index = SUBREG_REG (parts.index);
10270 /* Attempt to minimize number of registers in the address. */
10272 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
10274 && (!REG_P (parts.index)
10275 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
10279 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
10281 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
10282 && parts.base != parts.index)
10285 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
10286 since it's predecode logic can't detect the length of instructions
10287 and it degenerates to vector decoded. Increase cost of such
10288 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
10289 to split such addresses or even refuse such addresses at all.
10291 Following addressing modes are affected:
10296 The first and last case may be avoidable by explicitly coding the zero in
10297 memory address, but I don't have AMD-K6 machine handy to check this
10301 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
10302 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
10303 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
10309 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
10310 this is used for to form addresses to local data when -fPIC is in
10314 darwin_local_data_pic (rtx disp)
10316 return (GET_CODE (disp) == UNSPEC
10317 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
10320 /* Determine if a given RTX is a valid constant. We already know this
10321 satisfies CONSTANT_P. */
10324 legitimate_constant_p (rtx x)
10326 switch (GET_CODE (x))
10331 if (GET_CODE (x) == PLUS)
10333 if (!CONST_INT_P (XEXP (x, 1)))
10338 if (TARGET_MACHO && darwin_local_data_pic (x))
10341 /* Only some unspecs are valid as "constants". */
10342 if (GET_CODE (x) == UNSPEC)
10343 switch (XINT (x, 1))
10346 case UNSPEC_GOTOFF:
10347 case UNSPEC_PLTOFF:
10348 return TARGET_64BIT;
10350 case UNSPEC_NTPOFF:
10351 x = XVECEXP (x, 0, 0);
10352 return (GET_CODE (x) == SYMBOL_REF
10353 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
10354 case UNSPEC_DTPOFF:
10355 x = XVECEXP (x, 0, 0);
10356 return (GET_CODE (x) == SYMBOL_REF
10357 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
10362 /* We must have drilled down to a symbol. */
10363 if (GET_CODE (x) == LABEL_REF)
10365 if (GET_CODE (x) != SYMBOL_REF)
10370 /* TLS symbols are never valid. */
10371 if (SYMBOL_REF_TLS_MODEL (x))
10374 /* DLLIMPORT symbols are never valid. */
10375 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10376 && SYMBOL_REF_DLLIMPORT_P (x))
10381 if (GET_MODE (x) == TImode
10382 && x != CONST0_RTX (TImode)
10388 if (!standard_sse_constant_p (x))
10395 /* Otherwise we handle everything else in the move patterns. */
10399 /* Determine if it's legal to put X into the constant pool. This
10400 is not possible for the address of thread-local symbols, which
10401 is checked above. */
10404 ix86_cannot_force_const_mem (rtx x)
10406 /* We can always put integral constants and vectors in memory. */
10407 switch (GET_CODE (x))
10417 return !legitimate_constant_p (x);
10421 /* Nonzero if the constant value X is a legitimate general operand
10422 when generating PIC code. It is given that flag_pic is on and
10423 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
10426 legitimate_pic_operand_p (rtx x)
10430 switch (GET_CODE (x))
10433 inner = XEXP (x, 0);
10434 if (GET_CODE (inner) == PLUS
10435 && CONST_INT_P (XEXP (inner, 1)))
10436 inner = XEXP (inner, 0);
10438 /* Only some unspecs are valid as "constants". */
10439 if (GET_CODE (inner) == UNSPEC)
10440 switch (XINT (inner, 1))
10443 case UNSPEC_GOTOFF:
10444 case UNSPEC_PLTOFF:
10445 return TARGET_64BIT;
10447 x = XVECEXP (inner, 0, 0);
10448 return (GET_CODE (x) == SYMBOL_REF
10449 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
10450 case UNSPEC_MACHOPIC_OFFSET:
10451 return legitimate_pic_address_disp_p (x);
10459 return legitimate_pic_address_disp_p (x);
10466 /* Determine if a given CONST RTX is a valid memory displacement
10470 legitimate_pic_address_disp_p (rtx disp)
10474 /* In 64bit mode we can allow direct addresses of symbols and labels
10475 when they are not dynamic symbols. */
10478 rtx op0 = disp, op1;
10480 switch (GET_CODE (disp))
10486 if (GET_CODE (XEXP (disp, 0)) != PLUS)
10488 op0 = XEXP (XEXP (disp, 0), 0);
10489 op1 = XEXP (XEXP (disp, 0), 1);
10490 if (!CONST_INT_P (op1)
10491 || INTVAL (op1) >= 16*1024*1024
10492 || INTVAL (op1) < -16*1024*1024)
10494 if (GET_CODE (op0) == LABEL_REF)
10496 if (GET_CODE (op0) != SYMBOL_REF)
10501 /* TLS references should always be enclosed in UNSPEC. */
10502 if (SYMBOL_REF_TLS_MODEL (op0))
10504 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
10505 && ix86_cmodel != CM_LARGE_PIC)
10513 if (GET_CODE (disp) != CONST)
10515 disp = XEXP (disp, 0);
10519 /* We are unsafe to allow PLUS expressions. This limit allowed distance
10520 of GOT tables. We should not need these anyway. */
10521 if (GET_CODE (disp) != UNSPEC
10522 || (XINT (disp, 1) != UNSPEC_GOTPCREL
10523 && XINT (disp, 1) != UNSPEC_GOTOFF
10524 && XINT (disp, 1) != UNSPEC_PLTOFF))
10527 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
10528 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
10534 if (GET_CODE (disp) == PLUS)
10536 if (!CONST_INT_P (XEXP (disp, 1)))
10538 disp = XEXP (disp, 0);
10542 if (TARGET_MACHO && darwin_local_data_pic (disp))
10545 if (GET_CODE (disp) != UNSPEC)
10548 switch (XINT (disp, 1))
10553 /* We need to check for both symbols and labels because VxWorks loads
10554 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
10556 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
10557 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
10558 case UNSPEC_GOTOFF:
10559 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
10560 While ABI specify also 32bit relocation but we don't produce it in
10561 small PIC model at all. */
10562 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
10563 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
10565 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
10567 case UNSPEC_GOTTPOFF:
10568 case UNSPEC_GOTNTPOFF:
10569 case UNSPEC_INDNTPOFF:
10572 disp = XVECEXP (disp, 0, 0);
10573 return (GET_CODE (disp) == SYMBOL_REF
10574 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
10575 case UNSPEC_NTPOFF:
10576 disp = XVECEXP (disp, 0, 0);
10577 return (GET_CODE (disp) == SYMBOL_REF
10578 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
10579 case UNSPEC_DTPOFF:
10580 disp = XVECEXP (disp, 0, 0);
10581 return (GET_CODE (disp) == SYMBOL_REF
10582 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
10588 /* Recognizes RTL expressions that are valid memory addresses for an
10589 instruction. The MODE argument is the machine mode for the MEM
10590 expression that wants to use this address.
10592 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
10593 convert common non-canonical forms to canonical form so that they will
10597 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
10598 rtx addr, bool strict)
10600 struct ix86_address parts;
10601 rtx base, index, disp;
10602 HOST_WIDE_INT scale;
10604 if (ix86_decompose_address (addr, &parts) <= 0)
10605 /* Decomposition failed. */
10609 index = parts.index;
10611 scale = parts.scale;
10613 /* Validate base register.
10615 Don't allow SUBREG's that span more than a word here. It can lead to spill
10616 failures when the base is one word out of a two word structure, which is
10617 represented internally as a DImode int. */
10625 else if (GET_CODE (base) == SUBREG
10626 && REG_P (SUBREG_REG (base))
10627 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
10629 reg = SUBREG_REG (base);
10631 /* Base is not a register. */
10634 if (GET_MODE (base) != Pmode)
10635 /* Base is not in Pmode. */
10638 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
10639 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
10640 /* Base is not valid. */
10644 /* Validate index register.
10646 Don't allow SUBREG's that span more than a word here -- same as above. */
10654 else if (GET_CODE (index) == SUBREG
10655 && REG_P (SUBREG_REG (index))
10656 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
10658 reg = SUBREG_REG (index);
10660 /* Index is not a register. */
10663 if (GET_MODE (index) != Pmode)
10664 /* Index is not in Pmode. */
10667 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
10668 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
10669 /* Index is not valid. */
10673 /* Validate scale factor. */
10677 /* Scale without index. */
10680 if (scale != 2 && scale != 4 && scale != 8)
10681 /* Scale is not a valid multiplier. */
10685 /* Validate displacement. */
10688 if (GET_CODE (disp) == CONST
10689 && GET_CODE (XEXP (disp, 0)) == UNSPEC
10690 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
10691 switch (XINT (XEXP (disp, 0), 1))
10693 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
10694 used. While ABI specify also 32bit relocations, we don't produce
10695 them at all and use IP relative instead. */
10697 case UNSPEC_GOTOFF:
10698 gcc_assert (flag_pic);
10700 goto is_legitimate_pic;
10702 /* 64bit address unspec. */
10705 case UNSPEC_GOTPCREL:
10706 gcc_assert (flag_pic);
10707 goto is_legitimate_pic;
10709 case UNSPEC_GOTTPOFF:
10710 case UNSPEC_GOTNTPOFF:
10711 case UNSPEC_INDNTPOFF:
10712 case UNSPEC_NTPOFF:
10713 case UNSPEC_DTPOFF:
10717 /* Invalid address unspec. */
10721 else if (SYMBOLIC_CONST (disp)
10725 && MACHOPIC_INDIRECT
10726 && !machopic_operand_p (disp)
10732 if (TARGET_64BIT && (index || base))
10734 /* foo@dtpoff(%rX) is ok. */
10735 if (GET_CODE (disp) != CONST
10736 || GET_CODE (XEXP (disp, 0)) != PLUS
10737 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
10738 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
10739 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
10740 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
10741 /* Non-constant pic memory reference. */
10744 else if (! legitimate_pic_address_disp_p (disp))
10745 /* Displacement is an invalid pic construct. */
10748 /* This code used to verify that a symbolic pic displacement
10749 includes the pic_offset_table_rtx register.
10751 While this is good idea, unfortunately these constructs may
10752 be created by "adds using lea" optimization for incorrect
10761 This code is nonsensical, but results in addressing
10762 GOT table with pic_offset_table_rtx base. We can't
10763 just refuse it easily, since it gets matched by
10764 "addsi3" pattern, that later gets split to lea in the
10765 case output register differs from input. While this
10766 can be handled by separate addsi pattern for this case
10767 that never results in lea, this seems to be easier and
10768 correct fix for crash to disable this test. */
10770 else if (GET_CODE (disp) != LABEL_REF
10771 && !CONST_INT_P (disp)
10772 && (GET_CODE (disp) != CONST
10773 || !legitimate_constant_p (disp))
10774 && (GET_CODE (disp) != SYMBOL_REF
10775 || !legitimate_constant_p (disp)))
10776 /* Displacement is not constant. */
10778 else if (TARGET_64BIT
10779 && !x86_64_immediate_operand (disp, VOIDmode))
10780 /* Displacement is out of range. */
10784 /* Everything looks valid. */
10788 /* Determine if a given RTX is a valid constant address. */
10791 constant_address_p (rtx x)
10793 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
10796 /* Return a unique alias set for the GOT. */
10798 static alias_set_type
10799 ix86_GOT_alias_set (void)
10801 static alias_set_type set = -1;
10803 set = new_alias_set ();
10807 /* Return a legitimate reference for ORIG (an address) using the
10808 register REG. If REG is 0, a new pseudo is generated.
10810 There are two types of references that must be handled:
10812 1. Global data references must load the address from the GOT, via
10813 the PIC reg. An insn is emitted to do this load, and the reg is
10816 2. Static data references, constant pool addresses, and code labels
10817 compute the address as an offset from the GOT, whose base is in
10818 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
10819 differentiate them from global data objects. The returned
10820 address is the PIC reg + an unspec constant.
10822 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
10823 reg also appears in the address. */
10826 legitimize_pic_address (rtx orig, rtx reg)
10829 rtx new_rtx = orig;
10833 if (TARGET_MACHO && !TARGET_64BIT)
10836 reg = gen_reg_rtx (Pmode);
10837 /* Use the generic Mach-O PIC machinery. */
10838 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
10842 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
10844 else if (TARGET_64BIT
10845 && ix86_cmodel != CM_SMALL_PIC
10846 && gotoff_operand (addr, Pmode))
10849 /* This symbol may be referenced via a displacement from the PIC
10850 base address (@GOTOFF). */
10852 if (reload_in_progress)
10853 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
10854 if (GET_CODE (addr) == CONST)
10855 addr = XEXP (addr, 0);
10856 if (GET_CODE (addr) == PLUS)
10858 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
10860 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
10863 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
10864 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10866 tmpreg = gen_reg_rtx (Pmode);
10869 emit_move_insn (tmpreg, new_rtx);
10873 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
10874 tmpreg, 1, OPTAB_DIRECT);
10877 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
10879 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
10881 /* This symbol may be referenced via a displacement from the PIC
10882 base address (@GOTOFF). */
10884 if (reload_in_progress)
10885 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
10886 if (GET_CODE (addr) == CONST)
10887 addr = XEXP (addr, 0);
10888 if (GET_CODE (addr) == PLUS)
10890 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
10892 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
10895 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
10896 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10897 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
10901 emit_move_insn (reg, new_rtx);
10905 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
10906 /* We can't use @GOTOFF for text labels on VxWorks;
10907 see gotoff_operand. */
10908 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
10910 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
10912 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
10913 return legitimize_dllimport_symbol (addr, true);
10914 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
10915 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
10916 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
10918 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
10919 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
10923 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
10925 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
10926 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10927 new_rtx = gen_const_mem (Pmode, new_rtx);
10928 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
10931 reg = gen_reg_rtx (Pmode);
10932 /* Use directly gen_movsi, otherwise the address is loaded
10933 into register for CSE. We don't want to CSE this addresses,
10934 instead we CSE addresses from the GOT table, so skip this. */
10935 emit_insn (gen_movsi (reg, new_rtx));
10940 /* This symbol must be referenced via a load from the
10941 Global Offset Table (@GOT). */
10943 if (reload_in_progress)
10944 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
10945 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
10946 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10948 new_rtx = force_reg (Pmode, new_rtx);
10949 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
10950 new_rtx = gen_const_mem (Pmode, new_rtx);
10951 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
10954 reg = gen_reg_rtx (Pmode);
10955 emit_move_insn (reg, new_rtx);
10961 if (CONST_INT_P (addr)
10962 && !x86_64_immediate_operand (addr, VOIDmode))
10966 emit_move_insn (reg, addr);
10970 new_rtx = force_reg (Pmode, addr);
10972 else if (GET_CODE (addr) == CONST)
10974 addr = XEXP (addr, 0);
10976 /* We must match stuff we generate before. Assume the only
10977 unspecs that can get here are ours. Not that we could do
10978 anything with them anyway.... */
10979 if (GET_CODE (addr) == UNSPEC
10980 || (GET_CODE (addr) == PLUS
10981 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
10983 gcc_assert (GET_CODE (addr) == PLUS);
10985 if (GET_CODE (addr) == PLUS)
10987 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
10989 /* Check first to see if this is a constant offset from a @GOTOFF
10990 symbol reference. */
10991 if (gotoff_operand (op0, Pmode)
10992 && CONST_INT_P (op1))
10996 if (reload_in_progress)
10997 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
10998 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
11000 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
11001 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11002 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11006 emit_move_insn (reg, new_rtx);
11012 if (INTVAL (op1) < -16*1024*1024
11013 || INTVAL (op1) >= 16*1024*1024)
11015 if (!x86_64_immediate_operand (op1, Pmode))
11016 op1 = force_reg (Pmode, op1);
11017 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
11023 base = legitimize_pic_address (XEXP (addr, 0), reg);
11024 new_rtx = legitimize_pic_address (XEXP (addr, 1),
11025 base == reg ? NULL_RTX : reg);
11027 if (CONST_INT_P (new_rtx))
11028 new_rtx = plus_constant (base, INTVAL (new_rtx));
11031 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
11033 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
11034 new_rtx = XEXP (new_rtx, 1);
11036 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
11044 /* Load the thread pointer. If TO_REG is true, force it into a register. */
11047 get_thread_pointer (int to_reg)
11051 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
11055 reg = gen_reg_rtx (Pmode);
11056 insn = gen_rtx_SET (VOIDmode, reg, tp);
11057 insn = emit_insn (insn);
11062 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
11063 false if we expect this to be used for a memory address and true if
11064 we expect to load the address into a register. */
11067 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
11069 rtx dest, base, off, pic, tp;
11074 case TLS_MODEL_GLOBAL_DYNAMIC:
11075 dest = gen_reg_rtx (Pmode);
11076 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
11078 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
11080 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
11083 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
11084 insns = get_insns ();
11087 RTL_CONST_CALL_P (insns) = 1;
11088 emit_libcall_block (insns, dest, rax, x);
11090 else if (TARGET_64BIT && TARGET_GNU2_TLS)
11091 emit_insn (gen_tls_global_dynamic_64 (dest, x));
11093 emit_insn (gen_tls_global_dynamic_32 (dest, x));
11095 if (TARGET_GNU2_TLS)
11097 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
11099 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
11103 case TLS_MODEL_LOCAL_DYNAMIC:
11104 base = gen_reg_rtx (Pmode);
11105 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
11107 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
11109 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
11112 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
11113 insns = get_insns ();
11116 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
11117 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
11118 RTL_CONST_CALL_P (insns) = 1;
11119 emit_libcall_block (insns, base, rax, note);
11121 else if (TARGET_64BIT && TARGET_GNU2_TLS)
11122 emit_insn (gen_tls_local_dynamic_base_64 (base));
11124 emit_insn (gen_tls_local_dynamic_base_32 (base));
11126 if (TARGET_GNU2_TLS)
11128 rtx x = ix86_tls_module_base ();
11130 set_unique_reg_note (get_last_insn (), REG_EQUIV,
11131 gen_rtx_MINUS (Pmode, x, tp));
11134 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
11135 off = gen_rtx_CONST (Pmode, off);
11137 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
11139 if (TARGET_GNU2_TLS)
11141 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
11143 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
11148 case TLS_MODEL_INITIAL_EXEC:
11152 type = UNSPEC_GOTNTPOFF;
11156 if (reload_in_progress)
11157 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11158 pic = pic_offset_table_rtx;
11159 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
11161 else if (!TARGET_ANY_GNU_TLS)
11163 pic = gen_reg_rtx (Pmode);
11164 emit_insn (gen_set_got (pic));
11165 type = UNSPEC_GOTTPOFF;
11170 type = UNSPEC_INDNTPOFF;
11173 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
11174 off = gen_rtx_CONST (Pmode, off);
11176 off = gen_rtx_PLUS (Pmode, pic, off);
11177 off = gen_const_mem (Pmode, off);
11178 set_mem_alias_set (off, ix86_GOT_alias_set ());
11180 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11182 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11183 off = force_reg (Pmode, off);
11184 return gen_rtx_PLUS (Pmode, base, off);
11188 base = get_thread_pointer (true);
11189 dest = gen_reg_rtx (Pmode);
11190 emit_insn (gen_subsi3 (dest, base, off));
11194 case TLS_MODEL_LOCAL_EXEC:
11195 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
11196 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11197 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
11198 off = gen_rtx_CONST (Pmode, off);
11200 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11202 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11203 return gen_rtx_PLUS (Pmode, base, off);
11207 base = get_thread_pointer (true);
11208 dest = gen_reg_rtx (Pmode);
11209 emit_insn (gen_subsi3 (dest, base, off));
11214 gcc_unreachable ();
11220 /* Create or return the unique __imp_DECL dllimport symbol corresponding
11223 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
11224 htab_t dllimport_map;
11227 get_dllimport_decl (tree decl)
11229 struct tree_map *h, in;
11232 const char *prefix;
11233 size_t namelen, prefixlen;
11238 if (!dllimport_map)
11239 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
11241 in.hash = htab_hash_pointer (decl);
11242 in.base.from = decl;
11243 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
11244 h = (struct tree_map *) *loc;
11248 *loc = h = ggc_alloc_tree_map ();
11250 h->base.from = decl;
11251 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
11252 VAR_DECL, NULL, ptr_type_node);
11253 DECL_ARTIFICIAL (to) = 1;
11254 DECL_IGNORED_P (to) = 1;
11255 DECL_EXTERNAL (to) = 1;
11256 TREE_READONLY (to) = 1;
11258 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
11259 name = targetm.strip_name_encoding (name);
11260 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
11261 ? "*__imp_" : "*__imp__";
11262 namelen = strlen (name);
11263 prefixlen = strlen (prefix);
11264 imp_name = (char *) alloca (namelen + prefixlen + 1);
11265 memcpy (imp_name, prefix, prefixlen);
11266 memcpy (imp_name + prefixlen, name, namelen + 1);
11268 name = ggc_alloc_string (imp_name, namelen + prefixlen);
11269 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
11270 SET_SYMBOL_REF_DECL (rtl, to);
11271 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
11273 rtl = gen_const_mem (Pmode, rtl);
11274 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
11276 SET_DECL_RTL (to, rtl);
11277 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
11282 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
11283 true if we require the result be a register. */
11286 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
11291 gcc_assert (SYMBOL_REF_DECL (symbol));
11292 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
11294 x = DECL_RTL (imp_decl);
11296 x = force_reg (Pmode, x);
11300 /* Try machine-dependent ways of modifying an illegitimate address
11301 to be legitimate. If we find one, return the new, valid address.
11302 This macro is used in only one place: `memory_address' in explow.c.
11304 OLDX is the address as it was before break_out_memory_refs was called.
11305 In some cases it is useful to look at this to decide what needs to be done.
11307 It is always safe for this macro to do nothing. It exists to recognize
11308 opportunities to optimize the output.
11310 For the 80386, we handle X+REG by loading X into a register R and
11311 using R+REG. R will go in a general reg and indexing will be used.
11312 However, if REG is a broken-out memory address or multiplication,
11313 nothing needs to be done because REG can certainly go in a general reg.
11315 When -fpic is used, special handling is needed for symbolic references.
11316 See comments by legitimize_pic_address in i386.c for details. */
11319 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
11320 enum machine_mode mode)
11325 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
11327 return legitimize_tls_address (x, (enum tls_model) log, false);
11328 if (GET_CODE (x) == CONST
11329 && GET_CODE (XEXP (x, 0)) == PLUS
11330 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
11331 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
11333 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
11334 (enum tls_model) log, false);
11335 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
11338 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11340 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
11341 return legitimize_dllimport_symbol (x, true);
11342 if (GET_CODE (x) == CONST
11343 && GET_CODE (XEXP (x, 0)) == PLUS
11344 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
11345 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
11347 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
11348 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
11352 if (flag_pic && SYMBOLIC_CONST (x))
11353 return legitimize_pic_address (x, 0);
11355 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
11356 if (GET_CODE (x) == ASHIFT
11357 && CONST_INT_P (XEXP (x, 1))
11358 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
11361 log = INTVAL (XEXP (x, 1));
11362 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
11363 GEN_INT (1 << log));
11366 if (GET_CODE (x) == PLUS)
11368 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
11370 if (GET_CODE (XEXP (x, 0)) == ASHIFT
11371 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11372 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
11375 log = INTVAL (XEXP (XEXP (x, 0), 1));
11376 XEXP (x, 0) = gen_rtx_MULT (Pmode,
11377 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
11378 GEN_INT (1 << log));
11381 if (GET_CODE (XEXP (x, 1)) == ASHIFT
11382 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11383 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
11386 log = INTVAL (XEXP (XEXP (x, 1), 1));
11387 XEXP (x, 1) = gen_rtx_MULT (Pmode,
11388 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
11389 GEN_INT (1 << log));
11392 /* Put multiply first if it isn't already. */
11393 if (GET_CODE (XEXP (x, 1)) == MULT)
11395 rtx tmp = XEXP (x, 0);
11396 XEXP (x, 0) = XEXP (x, 1);
11401 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
11402 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
11403 created by virtual register instantiation, register elimination, and
11404 similar optimizations. */
11405 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
11408 x = gen_rtx_PLUS (Pmode,
11409 gen_rtx_PLUS (Pmode, XEXP (x, 0),
11410 XEXP (XEXP (x, 1), 0)),
11411 XEXP (XEXP (x, 1), 1));
11415 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
11416 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
11417 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
11418 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11419 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
11420 && CONSTANT_P (XEXP (x, 1)))
11423 rtx other = NULL_RTX;
11425 if (CONST_INT_P (XEXP (x, 1)))
11427 constant = XEXP (x, 1);
11428 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
11430 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
11432 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
11433 other = XEXP (x, 1);
11441 x = gen_rtx_PLUS (Pmode,
11442 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
11443 XEXP (XEXP (XEXP (x, 0), 1), 0)),
11444 plus_constant (other, INTVAL (constant)));
11448 if (changed && ix86_legitimate_address_p (mode, x, FALSE))
11451 if (GET_CODE (XEXP (x, 0)) == MULT)
11454 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
11457 if (GET_CODE (XEXP (x, 1)) == MULT)
11460 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
11464 && REG_P (XEXP (x, 1))
11465 && REG_P (XEXP (x, 0)))
11468 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
11471 x = legitimize_pic_address (x, 0);
11474 if (changed && ix86_legitimate_address_p (mode, x, FALSE))
11477 if (REG_P (XEXP (x, 0)))
11479 rtx temp = gen_reg_rtx (Pmode);
11480 rtx val = force_operand (XEXP (x, 1), temp);
11482 emit_move_insn (temp, val);
11484 XEXP (x, 1) = temp;
11488 else if (REG_P (XEXP (x, 1)))
11490 rtx temp = gen_reg_rtx (Pmode);
11491 rtx val = force_operand (XEXP (x, 0), temp);
11493 emit_move_insn (temp, val);
11495 XEXP (x, 0) = temp;
11503 /* Print an integer constant expression in assembler syntax. Addition
11504 and subtraction are the only arithmetic that may appear in these
11505 expressions. FILE is the stdio stream to write to, X is the rtx, and
11506 CODE is the operand print code from the output string. */
11509 output_pic_addr_const (FILE *file, rtx x, int code)
11513 switch (GET_CODE (x))
11516 gcc_assert (flag_pic);
11521 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
11522 output_addr_const (file, x);
11525 const char *name = XSTR (x, 0);
11527 /* Mark the decl as referenced so that cgraph will
11528 output the function. */
11529 if (SYMBOL_REF_DECL (x))
11530 mark_decl_referenced (SYMBOL_REF_DECL (x));
11533 if (MACHOPIC_INDIRECT
11534 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
11535 name = machopic_indirection_name (x, /*stub_p=*/true);
11537 assemble_name (file, name);
11539 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
11540 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
11541 fputs ("@PLT", file);
11548 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
11549 assemble_name (asm_out_file, buf);
11553 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
11557 /* This used to output parentheses around the expression,
11558 but that does not work on the 386 (either ATT or BSD assembler). */
11559 output_pic_addr_const (file, XEXP (x, 0), code);
11563 if (GET_MODE (x) == VOIDmode)
11565 /* We can use %d if the number is <32 bits and positive. */
11566 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
11567 fprintf (file, "0x%lx%08lx",
11568 (unsigned long) CONST_DOUBLE_HIGH (x),
11569 (unsigned long) CONST_DOUBLE_LOW (x));
11571 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
11574 /* We can't handle floating point constants;
11575 TARGET_PRINT_OPERAND must handle them. */
11576 output_operand_lossage ("floating constant misused");
11580 /* Some assemblers need integer constants to appear first. */
11581 if (CONST_INT_P (XEXP (x, 0)))
11583 output_pic_addr_const (file, XEXP (x, 0), code);
11585 output_pic_addr_const (file, XEXP (x, 1), code);
11589 gcc_assert (CONST_INT_P (XEXP (x, 1)));
11590 output_pic_addr_const (file, XEXP (x, 1), code);
11592 output_pic_addr_const (file, XEXP (x, 0), code);
11598 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
11599 output_pic_addr_const (file, XEXP (x, 0), code);
11601 output_pic_addr_const (file, XEXP (x, 1), code);
11603 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
11607 gcc_assert (XVECLEN (x, 0) == 1);
11608 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
11609 switch (XINT (x, 1))
11612 fputs ("@GOT", file);
11614 case UNSPEC_GOTOFF:
11615 fputs ("@GOTOFF", file);
11617 case UNSPEC_PLTOFF:
11618 fputs ("@PLTOFF", file);
11620 case UNSPEC_GOTPCREL:
11621 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11622 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
11624 case UNSPEC_GOTTPOFF:
11625 /* FIXME: This might be @TPOFF in Sun ld too. */
11626 fputs ("@gottpoff", file);
11629 fputs ("@tpoff", file);
11631 case UNSPEC_NTPOFF:
11633 fputs ("@tpoff", file);
11635 fputs ("@ntpoff", file);
11637 case UNSPEC_DTPOFF:
11638 fputs ("@dtpoff", file);
11640 case UNSPEC_GOTNTPOFF:
11642 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11643 "@gottpoff(%rip)": "@gottpoff[rip]", file);
11645 fputs ("@gotntpoff", file);
11647 case UNSPEC_INDNTPOFF:
11648 fputs ("@indntpoff", file);
11651 case UNSPEC_MACHOPIC_OFFSET:
11653 machopic_output_function_base_name (file);
11657 output_operand_lossage ("invalid UNSPEC as operand");
11663 output_operand_lossage ("invalid expression as operand");
11667 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
11668 We need to emit DTP-relative relocations. */
11670 static void ATTRIBUTE_UNUSED
11671 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
11673 fputs (ASM_LONG, file);
11674 output_addr_const (file, x);
11675 fputs ("@dtpoff", file);
11681 fputs (", 0", file);
11684 gcc_unreachable ();
11688 /* Return true if X is a representation of the PIC register. This copes
11689 with calls from ix86_find_base_term, where the register might have
11690 been replaced by a cselib value. */
11693 ix86_pic_register_p (rtx x)
11695 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
11696 return (pic_offset_table_rtx
11697 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
11699 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
11702 /* In the name of slightly smaller debug output, and to cater to
11703 general assembler lossage, recognize PIC+GOTOFF and turn it back
11704 into a direct symbol reference.
11706 On Darwin, this is necessary to avoid a crash, because Darwin
11707 has a different PIC label for each routine but the DWARF debugging
11708 information is not associated with any particular routine, so it's
11709 necessary to remove references to the PIC label from RTL stored by
11710 the DWARF output code. */
11713 ix86_delegitimize_address (rtx x)
11715 rtx orig_x = delegitimize_mem_from_attrs (x);
11716 /* addend is NULL or some rtx if x is something+GOTOFF where
11717 something doesn't include the PIC register. */
11718 rtx addend = NULL_RTX;
11719 /* reg_addend is NULL or a multiple of some register. */
11720 rtx reg_addend = NULL_RTX;
11721 /* const_addend is NULL or a const_int. */
11722 rtx const_addend = NULL_RTX;
11723 /* This is the result, or NULL. */
11724 rtx result = NULL_RTX;
11733 if (GET_CODE (x) != CONST
11734 || GET_CODE (XEXP (x, 0)) != UNSPEC
11735 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
11736 || !MEM_P (orig_x))
11738 x = XVECEXP (XEXP (x, 0), 0, 0);
11739 if (GET_MODE (orig_x) != Pmode)
11740 return simplify_gen_subreg (GET_MODE (orig_x), x, Pmode, 0);
11744 if (GET_CODE (x) != PLUS
11745 || GET_CODE (XEXP (x, 1)) != CONST)
11748 if (ix86_pic_register_p (XEXP (x, 0)))
11749 /* %ebx + GOT/GOTOFF */
11751 else if (GET_CODE (XEXP (x, 0)) == PLUS)
11753 /* %ebx + %reg * scale + GOT/GOTOFF */
11754 reg_addend = XEXP (x, 0);
11755 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
11756 reg_addend = XEXP (reg_addend, 1);
11757 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
11758 reg_addend = XEXP (reg_addend, 0);
11761 reg_addend = NULL_RTX;
11762 addend = XEXP (x, 0);
11766 addend = XEXP (x, 0);
11768 x = XEXP (XEXP (x, 1), 0);
11769 if (GET_CODE (x) == PLUS
11770 && CONST_INT_P (XEXP (x, 1)))
11772 const_addend = XEXP (x, 1);
11776 if (GET_CODE (x) == UNSPEC
11777 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
11778 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
11779 result = XVECEXP (x, 0, 0);
11781 if (TARGET_MACHO && darwin_local_data_pic (x)
11782 && !MEM_P (orig_x))
11783 result = XVECEXP (x, 0, 0);
11789 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
11791 result = gen_rtx_PLUS (Pmode, reg_addend, result);
11794 /* If the rest of original X doesn't involve the PIC register, add
11795 addend and subtract pic_offset_table_rtx. This can happen e.g.
11797 leal (%ebx, %ecx, 4), %ecx
11799 movl foo@GOTOFF(%ecx), %edx
11800 in which case we return (%ecx - %ebx) + foo. */
11801 if (pic_offset_table_rtx)
11802 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
11803 pic_offset_table_rtx),
11808 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
11809 return simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
11813 /* If X is a machine specific address (i.e. a symbol or label being
11814 referenced as a displacement from the GOT implemented using an
11815 UNSPEC), then return the base term. Otherwise return X. */
11818 ix86_find_base_term (rtx x)
11824 if (GET_CODE (x) != CONST)
11826 term = XEXP (x, 0);
11827 if (GET_CODE (term) == PLUS
11828 && (CONST_INT_P (XEXP (term, 1))
11829 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
11830 term = XEXP (term, 0);
11831 if (GET_CODE (term) != UNSPEC
11832 || XINT (term, 1) != UNSPEC_GOTPCREL)
11835 return XVECEXP (term, 0, 0);
11838 return ix86_delegitimize_address (x);
11842 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
11843 int fp, FILE *file)
11845 const char *suffix;
11847 if (mode == CCFPmode || mode == CCFPUmode)
11849 code = ix86_fp_compare_code_to_integer (code);
11853 code = reverse_condition (code);
11904 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
11908 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
11909 Those same assemblers have the same but opposite lossage on cmov. */
11910 if (mode == CCmode)
11911 suffix = fp ? "nbe" : "a";
11912 else if (mode == CCCmode)
11915 gcc_unreachable ();
11931 gcc_unreachable ();
11935 gcc_assert (mode == CCmode || mode == CCCmode);
11952 gcc_unreachable ();
11956 /* ??? As above. */
11957 gcc_assert (mode == CCmode || mode == CCCmode);
11958 suffix = fp ? "nb" : "ae";
11961 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
11965 /* ??? As above. */
11966 if (mode == CCmode)
11968 else if (mode == CCCmode)
11969 suffix = fp ? "nb" : "ae";
11971 gcc_unreachable ();
11974 suffix = fp ? "u" : "p";
11977 suffix = fp ? "nu" : "np";
11980 gcc_unreachable ();
11982 fputs (suffix, file);
11985 /* Print the name of register X to FILE based on its machine mode and number.
11986 If CODE is 'w', pretend the mode is HImode.
11987 If CODE is 'b', pretend the mode is QImode.
11988 If CODE is 'k', pretend the mode is SImode.
11989 If CODE is 'q', pretend the mode is DImode.
11990 If CODE is 'x', pretend the mode is V4SFmode.
11991 If CODE is 't', pretend the mode is V8SFmode.
11992 If CODE is 'h', pretend the reg is the 'high' byte register.
11993 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
11994 If CODE is 'd', duplicate the operand for AVX instruction.
11998 print_reg (rtx x, int code, FILE *file)
12001 bool duplicated = code == 'd' && TARGET_AVX;
12003 gcc_assert (x == pc_rtx
12004 || (REGNO (x) != ARG_POINTER_REGNUM
12005 && REGNO (x) != FRAME_POINTER_REGNUM
12006 && REGNO (x) != FLAGS_REG
12007 && REGNO (x) != FPSR_REG
12008 && REGNO (x) != FPCR_REG));
12010 if (ASSEMBLER_DIALECT == ASM_ATT)
12015 gcc_assert (TARGET_64BIT);
12016 fputs ("rip", file);
12020 if (code == 'w' || MMX_REG_P (x))
12022 else if (code == 'b')
12024 else if (code == 'k')
12026 else if (code == 'q')
12028 else if (code == 'y')
12030 else if (code == 'h')
12032 else if (code == 'x')
12034 else if (code == 't')
12037 code = GET_MODE_SIZE (GET_MODE (x));
12039 /* Irritatingly, AMD extended registers use different naming convention
12040 from the normal registers. */
12041 if (REX_INT_REG_P (x))
12043 gcc_assert (TARGET_64BIT);
12047 error ("extended registers have no high halves");
12050 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
12053 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
12056 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
12059 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
12062 error ("unsupported operand size for extended register");
12072 if (STACK_TOP_P (x))
12081 if (! ANY_FP_REG_P (x))
12082 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
12087 reg = hi_reg_name[REGNO (x)];
12090 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
12092 reg = qi_reg_name[REGNO (x)];
12095 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
12097 reg = qi_high_reg_name[REGNO (x)];
12102 gcc_assert (!duplicated);
12104 fputs (hi_reg_name[REGNO (x)] + 1, file);
12109 gcc_unreachable ();
12115 if (ASSEMBLER_DIALECT == ASM_ATT)
12116 fprintf (file, ", %%%s", reg);
12118 fprintf (file, ", %s", reg);
12122 /* Locate some local-dynamic symbol still in use by this function
12123 so that we can print its name in some tls_local_dynamic_base
12127 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
12131 if (GET_CODE (x) == SYMBOL_REF
12132 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
12134 cfun->machine->some_ld_name = XSTR (x, 0);
12141 static const char *
12142 get_some_local_dynamic_name (void)
12146 if (cfun->machine->some_ld_name)
12147 return cfun->machine->some_ld_name;
12149 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
12150 if (NONDEBUG_INSN_P (insn)
12151 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
12152 return cfun->machine->some_ld_name;
12157 /* Meaning of CODE:
12158 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
12159 C -- print opcode suffix for set/cmov insn.
12160 c -- like C, but print reversed condition
12161 F,f -- likewise, but for floating-point.
12162 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
12164 R -- print the prefix for register names.
12165 z -- print the opcode suffix for the size of the current operand.
12166 Z -- likewise, with special suffixes for x87 instructions.
12167 * -- print a star (in certain assembler syntax)
12168 A -- print an absolute memory reference.
12169 w -- print the operand as if it's a "word" (HImode) even if it isn't.
12170 s -- print a shift double count, followed by the assemblers argument
12172 b -- print the QImode name of the register for the indicated operand.
12173 %b0 would print %al if operands[0] is reg 0.
12174 w -- likewise, print the HImode name of the register.
12175 k -- likewise, print the SImode name of the register.
12176 q -- likewise, print the DImode name of the register.
12177 x -- likewise, print the V4SFmode name of the register.
12178 t -- likewise, print the V8SFmode name of the register.
12179 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
12180 y -- print "st(0)" instead of "st" as a register.
12181 d -- print duplicated register operand for AVX instruction.
12182 D -- print condition for SSE cmp instruction.
12183 P -- if PIC, print an @PLT suffix.
12184 X -- don't print any sort of PIC '@' suffix for a symbol.
12185 & -- print some in-use local-dynamic symbol name.
12186 H -- print a memory address offset by 8; used for sse high-parts
12187 Y -- print condition for XOP pcom* instruction.
12188 + -- print a branch hint as 'cs' or 'ds' prefix
12189 ; -- print a semicolon (after prefixes due to bug in older gas).
12193 ix86_print_operand (FILE *file, rtx x, int code)
12200 if (ASSEMBLER_DIALECT == ASM_ATT)
12206 const char *name = get_some_local_dynamic_name ();
12208 output_operand_lossage ("'%%&' used without any "
12209 "local dynamic TLS references");
12211 assemble_name (file, name);
12216 switch (ASSEMBLER_DIALECT)
12223 /* Intel syntax. For absolute addresses, registers should not
12224 be surrounded by braces. */
12228 ix86_print_operand (file, x, 0);
12235 gcc_unreachable ();
12238 ix86_print_operand (file, x, 0);
12243 if (ASSEMBLER_DIALECT == ASM_ATT)
12248 if (ASSEMBLER_DIALECT == ASM_ATT)
12253 if (ASSEMBLER_DIALECT == ASM_ATT)
12258 if (ASSEMBLER_DIALECT == ASM_ATT)
12263 if (ASSEMBLER_DIALECT == ASM_ATT)
12268 if (ASSEMBLER_DIALECT == ASM_ATT)
12273 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12275 /* Opcodes don't get size suffixes if using Intel opcodes. */
12276 if (ASSEMBLER_DIALECT == ASM_INTEL)
12279 switch (GET_MODE_SIZE (GET_MODE (x)))
12298 output_operand_lossage
12299 ("invalid operand size for operand code '%c'", code);
12304 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12306 (0, "non-integer operand used with operand code '%c'", code);
12310 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
12311 if (ASSEMBLER_DIALECT == ASM_INTEL)
12314 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12316 switch (GET_MODE_SIZE (GET_MODE (x)))
12319 #ifdef HAVE_AS_IX86_FILDS
12329 #ifdef HAVE_AS_IX86_FILDQ
12332 fputs ("ll", file);
12340 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12342 /* 387 opcodes don't get size suffixes
12343 if the operands are registers. */
12344 if (STACK_REG_P (x))
12347 switch (GET_MODE_SIZE (GET_MODE (x)))
12368 output_operand_lossage
12369 ("invalid operand type used with operand code '%c'", code);
12373 output_operand_lossage
12374 ("invalid operand size for operand code '%c'", code);
12391 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
12393 ix86_print_operand (file, x, 0);
12394 fputs (", ", file);
12399 /* Little bit of braindamage here. The SSE compare instructions
12400 does use completely different names for the comparisons that the
12401 fp conditional moves. */
12404 switch (GET_CODE (x))
12407 fputs ("eq", file);
12410 fputs ("eq_us", file);
12413 fputs ("lt", file);
12416 fputs ("nge", file);
12419 fputs ("le", file);
12422 fputs ("ngt", file);
12425 fputs ("unord", file);
12428 fputs ("neq", file);
12431 fputs ("neq_oq", file);
12434 fputs ("ge", file);
12437 fputs ("nlt", file);
12440 fputs ("gt", file);
12443 fputs ("nle", file);
12446 fputs ("ord", file);
12449 output_operand_lossage ("operand is not a condition code, "
12450 "invalid operand code 'D'");
12456 switch (GET_CODE (x))
12460 fputs ("eq", file);
12464 fputs ("lt", file);
12468 fputs ("le", file);
12471 fputs ("unord", file);
12475 fputs ("neq", file);
12479 fputs ("nlt", file);
12483 fputs ("nle", file);
12486 fputs ("ord", file);
12489 output_operand_lossage ("operand is not a condition code, "
12490 "invalid operand code 'D'");
12496 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12497 if (ASSEMBLER_DIALECT == ASM_ATT)
12499 switch (GET_MODE (x))
12501 case HImode: putc ('w', file); break;
12503 case SFmode: putc ('l', file); break;
12505 case DFmode: putc ('q', file); break;
12506 default: gcc_unreachable ();
12513 if (!COMPARISON_P (x))
12515 output_operand_lossage ("operand is neither a constant nor a "
12516 "condition code, invalid operand code "
12520 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
12523 if (!COMPARISON_P (x))
12525 output_operand_lossage ("operand is neither a constant nor a "
12526 "condition code, invalid operand code "
12530 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12531 if (ASSEMBLER_DIALECT == ASM_ATT)
12534 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
12537 /* Like above, but reverse condition */
12539 /* Check to see if argument to %c is really a constant
12540 and not a condition code which needs to be reversed. */
12541 if (!COMPARISON_P (x))
12543 output_operand_lossage ("operand is neither a constant nor a "
12544 "condition code, invalid operand "
12548 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
12551 if (!COMPARISON_P (x))
12553 output_operand_lossage ("operand is neither a constant nor a "
12554 "condition code, invalid operand "
12558 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12559 if (ASSEMBLER_DIALECT == ASM_ATT)
12562 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
12566 /* It doesn't actually matter what mode we use here, as we're
12567 only going to use this for printing. */
12568 x = adjust_address_nv (x, DImode, 8);
12576 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
12579 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
12582 int pred_val = INTVAL (XEXP (x, 0));
12584 if (pred_val < REG_BR_PROB_BASE * 45 / 100
12585 || pred_val > REG_BR_PROB_BASE * 55 / 100)
12587 int taken = pred_val > REG_BR_PROB_BASE / 2;
12588 int cputaken = final_forward_branch_p (current_output_insn) == 0;
12590 /* Emit hints only in the case default branch prediction
12591 heuristics would fail. */
12592 if (taken != cputaken)
12594 /* We use 3e (DS) prefix for taken branches and
12595 2e (CS) prefix for not taken branches. */
12597 fputs ("ds ; ", file);
12599 fputs ("cs ; ", file);
12607 switch (GET_CODE (x))
12610 fputs ("neq", file);
12613 fputs ("eq", file);
12617 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
12621 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
12625 fputs ("le", file);
12629 fputs ("lt", file);
12632 fputs ("unord", file);
12635 fputs ("ord", file);
12638 fputs ("ueq", file);
12641 fputs ("nlt", file);
12644 fputs ("nle", file);
12647 fputs ("ule", file);
12650 fputs ("ult", file);
12653 fputs ("une", file);
12656 output_operand_lossage ("operand is not a condition code, "
12657 "invalid operand code 'Y'");
12663 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
12669 output_operand_lossage ("invalid operand code '%c'", code);
12674 print_reg (x, code, file);
12676 else if (MEM_P (x))
12678 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
12679 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
12680 && GET_MODE (x) != BLKmode)
12683 switch (GET_MODE_SIZE (GET_MODE (x)))
12685 case 1: size = "BYTE"; break;
12686 case 2: size = "WORD"; break;
12687 case 4: size = "DWORD"; break;
12688 case 8: size = "QWORD"; break;
12689 case 12: size = "TBYTE"; break;
12691 if (GET_MODE (x) == XFmode)
12696 case 32: size = "YMMWORD"; break;
12698 gcc_unreachable ();
12701 /* Check for explicit size override (codes 'b', 'w' and 'k') */
12704 else if (code == 'w')
12706 else if (code == 'k')
12709 fputs (size, file);
12710 fputs (" PTR ", file);
12714 /* Avoid (%rip) for call operands. */
12715 if (CONSTANT_ADDRESS_P (x) && code == 'P'
12716 && !CONST_INT_P (x))
12717 output_addr_const (file, x);
12718 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
12719 output_operand_lossage ("invalid constraints for operand");
12721 output_address (x);
12724 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
12729 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12730 REAL_VALUE_TO_TARGET_SINGLE (r, l);
12732 if (ASSEMBLER_DIALECT == ASM_ATT)
12734 fprintf (file, "0x%08lx", (long unsigned int) l);
12737 /* These float cases don't actually occur as immediate operands. */
12738 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
12742 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
12743 fputs (dstr, file);
12746 else if (GET_CODE (x) == CONST_DOUBLE
12747 && GET_MODE (x) == XFmode)
12751 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
12752 fputs (dstr, file);
12757 /* We have patterns that allow zero sets of memory, for instance.
12758 In 64-bit mode, we should probably support all 8-byte vectors,
12759 since we can in fact encode that into an immediate. */
12760 if (GET_CODE (x) == CONST_VECTOR)
12762 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
12768 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
12770 if (ASSEMBLER_DIALECT == ASM_ATT)
12773 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
12774 || GET_CODE (x) == LABEL_REF)
12776 if (ASSEMBLER_DIALECT == ASM_ATT)
12779 fputs ("OFFSET FLAT:", file);
12782 if (CONST_INT_P (x))
12783 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
12785 output_pic_addr_const (file, x, code);
12787 output_addr_const (file, x);
12792 ix86_print_operand_punct_valid_p (unsigned char code)
12794 return (code == '*' || code == '+' || code == '&' || code == ';');
12797 /* Print a memory operand whose address is ADDR. */
12800 ix86_print_operand_address (FILE *file, rtx addr)
12802 struct ix86_address parts;
12803 rtx base, index, disp;
12805 int ok = ix86_decompose_address (addr, &parts);
12810 index = parts.index;
12812 scale = parts.scale;
12820 if (ASSEMBLER_DIALECT == ASM_ATT)
12822 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
12825 gcc_unreachable ();
12828 /* Use one byte shorter RIP relative addressing for 64bit mode. */
12829 if (TARGET_64BIT && !base && !index)
12833 if (GET_CODE (disp) == CONST
12834 && GET_CODE (XEXP (disp, 0)) == PLUS
12835 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
12836 symbol = XEXP (XEXP (disp, 0), 0);
12838 if (GET_CODE (symbol) == LABEL_REF
12839 || (GET_CODE (symbol) == SYMBOL_REF
12840 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
12843 if (!base && !index)
12845 /* Displacement only requires special attention. */
12847 if (CONST_INT_P (disp))
12849 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
12850 fputs ("ds:", file);
12851 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
12854 output_pic_addr_const (file, disp, 0);
12856 output_addr_const (file, disp);
12860 if (ASSEMBLER_DIALECT == ASM_ATT)
12865 output_pic_addr_const (file, disp, 0);
12866 else if (GET_CODE (disp) == LABEL_REF)
12867 output_asm_label (disp);
12869 output_addr_const (file, disp);
12874 print_reg (base, 0, file);
12878 print_reg (index, 0, file);
12880 fprintf (file, ",%d", scale);
12886 rtx offset = NULL_RTX;
12890 /* Pull out the offset of a symbol; print any symbol itself. */
12891 if (GET_CODE (disp) == CONST
12892 && GET_CODE (XEXP (disp, 0)) == PLUS
12893 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
12895 offset = XEXP (XEXP (disp, 0), 1);
12896 disp = gen_rtx_CONST (VOIDmode,
12897 XEXP (XEXP (disp, 0), 0));
12901 output_pic_addr_const (file, disp, 0);
12902 else if (GET_CODE (disp) == LABEL_REF)
12903 output_asm_label (disp);
12904 else if (CONST_INT_P (disp))
12907 output_addr_const (file, disp);
12913 print_reg (base, 0, file);
12916 if (INTVAL (offset) >= 0)
12918 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
12922 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
12929 print_reg (index, 0, file);
12931 fprintf (file, "*%d", scale);
12939 output_addr_const_extra (FILE *file, rtx x)
12943 if (GET_CODE (x) != UNSPEC)
12946 op = XVECEXP (x, 0, 0);
12947 switch (XINT (x, 1))
12949 case UNSPEC_GOTTPOFF:
12950 output_addr_const (file, op);
12951 /* FIXME: This might be @TPOFF in Sun ld. */
12952 fputs ("@gottpoff", file);
12955 output_addr_const (file, op);
12956 fputs ("@tpoff", file);
12958 case UNSPEC_NTPOFF:
12959 output_addr_const (file, op);
12961 fputs ("@tpoff", file);
12963 fputs ("@ntpoff", file);
12965 case UNSPEC_DTPOFF:
12966 output_addr_const (file, op);
12967 fputs ("@dtpoff", file);
12969 case UNSPEC_GOTNTPOFF:
12970 output_addr_const (file, op);
12972 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12973 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
12975 fputs ("@gotntpoff", file);
12977 case UNSPEC_INDNTPOFF:
12978 output_addr_const (file, op);
12979 fputs ("@indntpoff", file);
12982 case UNSPEC_MACHOPIC_OFFSET:
12983 output_addr_const (file, op);
12985 machopic_output_function_base_name (file);
12996 /* Split one or more DImode RTL references into pairs of SImode
12997 references. The RTL can be REG, offsettable MEM, integer constant, or
12998 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
12999 split and "num" is its length. lo_half and hi_half are output arrays
13000 that parallel "operands". */
13003 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
13007 rtx op = operands[num];
13009 /* simplify_subreg refuse to split volatile memory addresses,
13010 but we still have to handle it. */
13013 lo_half[num] = adjust_address (op, SImode, 0);
13014 hi_half[num] = adjust_address (op, SImode, 4);
13018 lo_half[num] = simplify_gen_subreg (SImode, op,
13019 GET_MODE (op) == VOIDmode
13020 ? DImode : GET_MODE (op), 0);
13021 hi_half[num] = simplify_gen_subreg (SImode, op,
13022 GET_MODE (op) == VOIDmode
13023 ? DImode : GET_MODE (op), 4);
13027 /* Split one or more TImode RTL references into pairs of DImode
13028 references. The RTL can be REG, offsettable MEM, integer constant, or
13029 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
13030 split and "num" is its length. lo_half and hi_half are output arrays
13031 that parallel "operands". */
13034 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
13038 rtx op = operands[num];
13040 /* simplify_subreg refuse to split volatile memory addresses, but we
13041 still have to handle it. */
13044 lo_half[num] = adjust_address (op, DImode, 0);
13045 hi_half[num] = adjust_address (op, DImode, 8);
13049 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
13050 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
13055 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
13056 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
13057 is the expression of the binary operation. The output may either be
13058 emitted here, or returned to the caller, like all output_* functions.
13060 There is no guarantee that the operands are the same mode, as they
13061 might be within FLOAT or FLOAT_EXTEND expressions. */
13063 #ifndef SYSV386_COMPAT
13064 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
13065 wants to fix the assemblers because that causes incompatibility
13066 with gcc. No-one wants to fix gcc because that causes
13067 incompatibility with assemblers... You can use the option of
13068 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
13069 #define SYSV386_COMPAT 1
13073 output_387_binary_op (rtx insn, rtx *operands)
13075 static char buf[40];
13078 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
13080 #ifdef ENABLE_CHECKING
13081 /* Even if we do not want to check the inputs, this documents input
13082 constraints. Which helps in understanding the following code. */
13083 if (STACK_REG_P (operands[0])
13084 && ((REG_P (operands[1])
13085 && REGNO (operands[0]) == REGNO (operands[1])
13086 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
13087 || (REG_P (operands[2])
13088 && REGNO (operands[0]) == REGNO (operands[2])
13089 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
13090 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
13093 gcc_assert (is_sse);
13096 switch (GET_CODE (operands[3]))
13099 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13100 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13108 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13109 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13117 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13118 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13126 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13127 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13135 gcc_unreachable ();
13142 strcpy (buf, ssep);
13143 if (GET_MODE (operands[0]) == SFmode)
13144 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
13146 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
13150 strcpy (buf, ssep + 1);
13151 if (GET_MODE (operands[0]) == SFmode)
13152 strcat (buf, "ss\t{%2, %0|%0, %2}");
13154 strcat (buf, "sd\t{%2, %0|%0, %2}");
13160 switch (GET_CODE (operands[3]))
13164 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
13166 rtx temp = operands[2];
13167 operands[2] = operands[1];
13168 operands[1] = temp;
13171 /* know operands[0] == operands[1]. */
13173 if (MEM_P (operands[2]))
13179 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13181 if (STACK_TOP_P (operands[0]))
13182 /* How is it that we are storing to a dead operand[2]?
13183 Well, presumably operands[1] is dead too. We can't
13184 store the result to st(0) as st(0) gets popped on this
13185 instruction. Instead store to operands[2] (which I
13186 think has to be st(1)). st(1) will be popped later.
13187 gcc <= 2.8.1 didn't have this check and generated
13188 assembly code that the Unixware assembler rejected. */
13189 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13191 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13195 if (STACK_TOP_P (operands[0]))
13196 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13198 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13203 if (MEM_P (operands[1]))
13209 if (MEM_P (operands[2]))
13215 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13218 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
13219 derived assemblers, confusingly reverse the direction of
13220 the operation for fsub{r} and fdiv{r} when the
13221 destination register is not st(0). The Intel assembler
13222 doesn't have this brain damage. Read !SYSV386_COMPAT to
13223 figure out what the hardware really does. */
13224 if (STACK_TOP_P (operands[0]))
13225 p = "{p\t%0, %2|rp\t%2, %0}";
13227 p = "{rp\t%2, %0|p\t%0, %2}";
13229 if (STACK_TOP_P (operands[0]))
13230 /* As above for fmul/fadd, we can't store to st(0). */
13231 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13233 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13238 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
13241 if (STACK_TOP_P (operands[0]))
13242 p = "{rp\t%0, %1|p\t%1, %0}";
13244 p = "{p\t%1, %0|rp\t%0, %1}";
13246 if (STACK_TOP_P (operands[0]))
13247 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
13249 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
13254 if (STACK_TOP_P (operands[0]))
13256 if (STACK_TOP_P (operands[1]))
13257 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13259 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
13262 else if (STACK_TOP_P (operands[1]))
13265 p = "{\t%1, %0|r\t%0, %1}";
13267 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
13273 p = "{r\t%2, %0|\t%0, %2}";
13275 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13281 gcc_unreachable ();
13288 /* Return needed mode for entity in optimize_mode_switching pass. */
13291 ix86_mode_needed (int entity, rtx insn)
13293 enum attr_i387_cw mode;
13295 /* The mode UNINITIALIZED is used to store control word after a
13296 function call or ASM pattern. The mode ANY specify that function
13297 has no requirements on the control word and make no changes in the
13298 bits we are interested in. */
13301 || (NONJUMP_INSN_P (insn)
13302 && (asm_noperands (PATTERN (insn)) >= 0
13303 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
13304 return I387_CW_UNINITIALIZED;
13306 if (recog_memoized (insn) < 0)
13307 return I387_CW_ANY;
13309 mode = get_attr_i387_cw (insn);
13314 if (mode == I387_CW_TRUNC)
13319 if (mode == I387_CW_FLOOR)
13324 if (mode == I387_CW_CEIL)
13329 if (mode == I387_CW_MASK_PM)
13334 gcc_unreachable ();
13337 return I387_CW_ANY;
13340 /* Output code to initialize control word copies used by trunc?f?i and
13341 rounding patterns. CURRENT_MODE is set to current control word,
13342 while NEW_MODE is set to new control word. */
13345 emit_i387_cw_initialization (int mode)
13347 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
13350 enum ix86_stack_slot slot;
13352 rtx reg = gen_reg_rtx (HImode);
13354 emit_insn (gen_x86_fnstcw_1 (stored_mode));
13355 emit_move_insn (reg, copy_rtx (stored_mode));
13357 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
13358 || optimize_function_for_size_p (cfun))
13362 case I387_CW_TRUNC:
13363 /* round toward zero (truncate) */
13364 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
13365 slot = SLOT_CW_TRUNC;
13368 case I387_CW_FLOOR:
13369 /* round down toward -oo */
13370 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
13371 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
13372 slot = SLOT_CW_FLOOR;
13376 /* round up toward +oo */
13377 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
13378 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
13379 slot = SLOT_CW_CEIL;
13382 case I387_CW_MASK_PM:
13383 /* mask precision exception for nearbyint() */
13384 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
13385 slot = SLOT_CW_MASK_PM;
13389 gcc_unreachable ();
13396 case I387_CW_TRUNC:
13397 /* round toward zero (truncate) */
13398 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
13399 slot = SLOT_CW_TRUNC;
13402 case I387_CW_FLOOR:
13403 /* round down toward -oo */
13404 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
13405 slot = SLOT_CW_FLOOR;
13409 /* round up toward +oo */
13410 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
13411 slot = SLOT_CW_CEIL;
13414 case I387_CW_MASK_PM:
13415 /* mask precision exception for nearbyint() */
13416 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
13417 slot = SLOT_CW_MASK_PM;
13421 gcc_unreachable ();
13425 gcc_assert (slot < MAX_386_STACK_LOCALS);
13427 new_mode = assign_386_stack_local (HImode, slot);
13428 emit_move_insn (new_mode, reg);
13431 /* Output code for INSN to convert a float to a signed int. OPERANDS
13432 are the insn operands. The output may be [HSD]Imode and the input
13433 operand may be [SDX]Fmode. */
13436 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
13438 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
13439 int dimode_p = GET_MODE (operands[0]) == DImode;
13440 int round_mode = get_attr_i387_cw (insn);
13442 /* Jump through a hoop or two for DImode, since the hardware has no
13443 non-popping instruction. We used to do this a different way, but
13444 that was somewhat fragile and broke with post-reload splitters. */
13445 if ((dimode_p || fisttp) && !stack_top_dies)
13446 output_asm_insn ("fld\t%y1", operands);
13448 gcc_assert (STACK_TOP_P (operands[1]));
13449 gcc_assert (MEM_P (operands[0]));
13450 gcc_assert (GET_MODE (operands[1]) != TFmode);
13453 output_asm_insn ("fisttp%Z0\t%0", operands);
13456 if (round_mode != I387_CW_ANY)
13457 output_asm_insn ("fldcw\t%3", operands);
13458 if (stack_top_dies || dimode_p)
13459 output_asm_insn ("fistp%Z0\t%0", operands);
13461 output_asm_insn ("fist%Z0\t%0", operands);
13462 if (round_mode != I387_CW_ANY)
13463 output_asm_insn ("fldcw\t%2", operands);
13469 /* Output code for x87 ffreep insn. The OPNO argument, which may only
13470 have the values zero or one, indicates the ffreep insn's operand
13471 from the OPERANDS array. */
13473 static const char *
13474 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
13476 if (TARGET_USE_FFREEP)
13477 #ifdef HAVE_AS_IX86_FFREEP
13478 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
13481 static char retval[32];
13482 int regno = REGNO (operands[opno]);
13484 gcc_assert (FP_REGNO_P (regno));
13486 regno -= FIRST_STACK_REG;
13488 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
13493 return opno ? "fstp\t%y1" : "fstp\t%y0";
13497 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
13498 should be used. UNORDERED_P is true when fucom should be used. */
13501 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
13503 int stack_top_dies;
13504 rtx cmp_op0, cmp_op1;
13505 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
13509 cmp_op0 = operands[0];
13510 cmp_op1 = operands[1];
13514 cmp_op0 = operands[1];
13515 cmp_op1 = operands[2];
13520 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
13521 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
13522 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
13523 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
13525 if (GET_MODE (operands[0]) == SFmode)
13527 return &ucomiss[TARGET_AVX ? 0 : 1];
13529 return &comiss[TARGET_AVX ? 0 : 1];
13532 return &ucomisd[TARGET_AVX ? 0 : 1];
13534 return &comisd[TARGET_AVX ? 0 : 1];
13537 gcc_assert (STACK_TOP_P (cmp_op0));
13539 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
13541 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
13543 if (stack_top_dies)
13545 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
13546 return output_387_ffreep (operands, 1);
13549 return "ftst\n\tfnstsw\t%0";
13552 if (STACK_REG_P (cmp_op1)
13554 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
13555 && REGNO (cmp_op1) != FIRST_STACK_REG)
13557 /* If both the top of the 387 stack dies, and the other operand
13558 is also a stack register that dies, then this must be a
13559 `fcompp' float compare */
13563 /* There is no double popping fcomi variant. Fortunately,
13564 eflags is immune from the fstp's cc clobbering. */
13566 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
13568 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
13569 return output_387_ffreep (operands, 0);
13574 return "fucompp\n\tfnstsw\t%0";
13576 return "fcompp\n\tfnstsw\t%0";
13581 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
13583 static const char * const alt[16] =
13585 "fcom%Z2\t%y2\n\tfnstsw\t%0",
13586 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
13587 "fucom%Z2\t%y2\n\tfnstsw\t%0",
13588 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
13590 "ficom%Z2\t%y2\n\tfnstsw\t%0",
13591 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
13595 "fcomi\t{%y1, %0|%0, %y1}",
13596 "fcomip\t{%y1, %0|%0, %y1}",
13597 "fucomi\t{%y1, %0|%0, %y1}",
13598 "fucomip\t{%y1, %0|%0, %y1}",
13609 mask = eflags_p << 3;
13610 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
13611 mask |= unordered_p << 1;
13612 mask |= stack_top_dies;
13614 gcc_assert (mask < 16);
13623 ix86_output_addr_vec_elt (FILE *file, int value)
13625 const char *directive = ASM_LONG;
13629 directive = ASM_QUAD;
13631 gcc_assert (!TARGET_64BIT);
13634 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
13638 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
13640 const char *directive = ASM_LONG;
13643 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
13644 directive = ASM_QUAD;
13646 gcc_assert (!TARGET_64BIT);
13648 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
13649 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
13650 fprintf (file, "%s%s%d-%s%d\n",
13651 directive, LPREFIX, value, LPREFIX, rel);
13652 else if (HAVE_AS_GOTOFF_IN_DATA)
13653 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
13655 else if (TARGET_MACHO)
13657 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
13658 machopic_output_function_base_name (file);
13663 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
13664 GOT_SYMBOL_NAME, LPREFIX, value);
13667 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
13671 ix86_expand_clear (rtx dest)
13675 /* We play register width games, which are only valid after reload. */
13676 gcc_assert (reload_completed);
13678 /* Avoid HImode and its attendant prefix byte. */
13679 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
13680 dest = gen_rtx_REG (SImode, REGNO (dest));
13681 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
13683 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
13684 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
13686 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13687 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
13693 /* X is an unchanging MEM. If it is a constant pool reference, return
13694 the constant pool rtx, else NULL. */
13697 maybe_get_pool_constant (rtx x)
13699 x = ix86_delegitimize_address (XEXP (x, 0));
13701 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
13702 return get_pool_constant (x);
13708 ix86_expand_move (enum machine_mode mode, rtx operands[])
13711 enum tls_model model;
13716 if (GET_CODE (op1) == SYMBOL_REF)
13718 model = SYMBOL_REF_TLS_MODEL (op1);
13721 op1 = legitimize_tls_address (op1, model, true);
13722 op1 = force_operand (op1, op0);
13726 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
13727 && SYMBOL_REF_DLLIMPORT_P (op1))
13728 op1 = legitimize_dllimport_symbol (op1, false);
13730 else if (GET_CODE (op1) == CONST
13731 && GET_CODE (XEXP (op1, 0)) == PLUS
13732 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
13734 rtx addend = XEXP (XEXP (op1, 0), 1);
13735 rtx symbol = XEXP (XEXP (op1, 0), 0);
13738 model = SYMBOL_REF_TLS_MODEL (symbol);
13740 tmp = legitimize_tls_address (symbol, model, true);
13741 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
13742 && SYMBOL_REF_DLLIMPORT_P (symbol))
13743 tmp = legitimize_dllimport_symbol (symbol, true);
13747 tmp = force_operand (tmp, NULL);
13748 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
13749 op0, 1, OPTAB_DIRECT);
13755 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
13757 if (TARGET_MACHO && !TARGET_64BIT)
13762 rtx temp = ((reload_in_progress
13763 || ((op0 && REG_P (op0))
13765 ? op0 : gen_reg_rtx (Pmode));
13766 op1 = machopic_indirect_data_reference (op1, temp);
13767 op1 = machopic_legitimize_pic_address (op1, mode,
13768 temp == op1 ? 0 : temp);
13770 else if (MACHOPIC_INDIRECT)
13771 op1 = machopic_indirect_data_reference (op1, 0);
13779 op1 = force_reg (Pmode, op1);
13780 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
13782 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
13783 op1 = legitimize_pic_address (op1, reg);
13792 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
13793 || !push_operand (op0, mode))
13795 op1 = force_reg (mode, op1);
13797 if (push_operand (op0, mode)
13798 && ! general_no_elim_operand (op1, mode))
13799 op1 = copy_to_mode_reg (mode, op1);
13801 /* Force large constants in 64bit compilation into register
13802 to get them CSEed. */
13803 if (can_create_pseudo_p ()
13804 && (mode == DImode) && TARGET_64BIT
13805 && immediate_operand (op1, mode)
13806 && !x86_64_zext_immediate_operand (op1, VOIDmode)
13807 && !register_operand (op0, mode)
13809 op1 = copy_to_mode_reg (mode, op1);
13811 if (can_create_pseudo_p ()
13812 && FLOAT_MODE_P (mode)
13813 && GET_CODE (op1) == CONST_DOUBLE)
13815 /* If we are loading a floating point constant to a register,
13816 force the value to memory now, since we'll get better code
13817 out the back end. */
13819 op1 = validize_mem (force_const_mem (mode, op1));
13820 if (!register_operand (op0, mode))
13822 rtx temp = gen_reg_rtx (mode);
13823 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
13824 emit_move_insn (op0, temp);
13830 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
13834 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
13836 rtx op0 = operands[0], op1 = operands[1];
13837 unsigned int align = GET_MODE_ALIGNMENT (mode);
13839 /* Force constants other than zero into memory. We do not know how
13840 the instructions used to build constants modify the upper 64 bits
13841 of the register, once we have that information we may be able
13842 to handle some of them more efficiently. */
13843 if (can_create_pseudo_p ()
13844 && register_operand (op0, mode)
13845 && (CONSTANT_P (op1)
13846 || (GET_CODE (op1) == SUBREG
13847 && CONSTANT_P (SUBREG_REG (op1))))
13848 && !standard_sse_constant_p (op1))
13849 op1 = validize_mem (force_const_mem (mode, op1));
13851 /* We need to check memory alignment for SSE mode since attribute
13852 can make operands unaligned. */
13853 if (can_create_pseudo_p ()
13854 && SSE_REG_MODE_P (mode)
13855 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
13856 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
13860 /* ix86_expand_vector_move_misalign() does not like constants ... */
13861 if (CONSTANT_P (op1)
13862 || (GET_CODE (op1) == SUBREG
13863 && CONSTANT_P (SUBREG_REG (op1))))
13864 op1 = validize_mem (force_const_mem (mode, op1));
13866 /* ... nor both arguments in memory. */
13867 if (!register_operand (op0, mode)
13868 && !register_operand (op1, mode))
13869 op1 = force_reg (mode, op1);
13871 tmp[0] = op0; tmp[1] = op1;
13872 ix86_expand_vector_move_misalign (mode, tmp);
13876 /* Make operand1 a register if it isn't already. */
13877 if (can_create_pseudo_p ()
13878 && !register_operand (op0, mode)
13879 && !register_operand (op1, mode))
13881 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
13885 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
13888 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
13889 straight to ix86_expand_vector_move. */
13890 /* Code generation for scalar reg-reg moves of single and double precision data:
13891 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
13895 if (x86_sse_partial_reg_dependency == true)
13900 Code generation for scalar loads of double precision data:
13901 if (x86_sse_split_regs == true)
13902 movlpd mem, reg (gas syntax)
13906 Code generation for unaligned packed loads of single precision data
13907 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
13908 if (x86_sse_unaligned_move_optimal)
13911 if (x86_sse_partial_reg_dependency == true)
13923 Code generation for unaligned packed loads of double precision data
13924 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
13925 if (x86_sse_unaligned_move_optimal)
13928 if (x86_sse_split_regs == true)
13941 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
13950 switch (GET_MODE_CLASS (mode))
13952 case MODE_VECTOR_INT:
13954 switch (GET_MODE_SIZE (mode))
13957 /* If we're optimizing for size, movups is the smallest. */
13958 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
13960 op0 = gen_lowpart (V4SFmode, op0);
13961 op1 = gen_lowpart (V4SFmode, op1);
13962 emit_insn (gen_avx_movups (op0, op1));
13965 op0 = gen_lowpart (V16QImode, op0);
13966 op1 = gen_lowpart (V16QImode, op1);
13967 emit_insn (gen_avx_movdqu (op0, op1));
13970 op0 = gen_lowpart (V32QImode, op0);
13971 op1 = gen_lowpart (V32QImode, op1);
13972 emit_insn (gen_avx_movdqu256 (op0, op1));
13975 gcc_unreachable ();
13978 case MODE_VECTOR_FLOAT:
13979 op0 = gen_lowpart (mode, op0);
13980 op1 = gen_lowpart (mode, op1);
13985 emit_insn (gen_avx_movups (op0, op1));
13988 emit_insn (gen_avx_movups256 (op0, op1));
13991 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
13993 op0 = gen_lowpart (V4SFmode, op0);
13994 op1 = gen_lowpart (V4SFmode, op1);
13995 emit_insn (gen_avx_movups (op0, op1));
13998 emit_insn (gen_avx_movupd (op0, op1));
14001 emit_insn (gen_avx_movupd256 (op0, op1));
14004 gcc_unreachable ();
14009 gcc_unreachable ();
14017 /* If we're optimizing for size, movups is the smallest. */
14018 if (optimize_insn_for_size_p ()
14019 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14021 op0 = gen_lowpart (V4SFmode, op0);
14022 op1 = gen_lowpart (V4SFmode, op1);
14023 emit_insn (gen_sse_movups (op0, op1));
14027 /* ??? If we have typed data, then it would appear that using
14028 movdqu is the only way to get unaligned data loaded with
14030 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14032 op0 = gen_lowpart (V16QImode, op0);
14033 op1 = gen_lowpart (V16QImode, op1);
14034 emit_insn (gen_sse2_movdqu (op0, op1));
14038 if (TARGET_SSE2 && mode == V2DFmode)
14042 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
14044 op0 = gen_lowpart (V2DFmode, op0);
14045 op1 = gen_lowpart (V2DFmode, op1);
14046 emit_insn (gen_sse2_movupd (op0, op1));
14050 /* When SSE registers are split into halves, we can avoid
14051 writing to the top half twice. */
14052 if (TARGET_SSE_SPLIT_REGS)
14054 emit_clobber (op0);
14059 /* ??? Not sure about the best option for the Intel chips.
14060 The following would seem to satisfy; the register is
14061 entirely cleared, breaking the dependency chain. We
14062 then store to the upper half, with a dependency depth
14063 of one. A rumor has it that Intel recommends two movsd
14064 followed by an unpacklpd, but this is unconfirmed. And
14065 given that the dependency depth of the unpacklpd would
14066 still be one, I'm not sure why this would be better. */
14067 zero = CONST0_RTX (V2DFmode);
14070 m = adjust_address (op1, DFmode, 0);
14071 emit_insn (gen_sse2_loadlpd (op0, zero, m));
14072 m = adjust_address (op1, DFmode, 8);
14073 emit_insn (gen_sse2_loadhpd (op0, op0, m));
14077 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
14079 op0 = gen_lowpart (V4SFmode, op0);
14080 op1 = gen_lowpart (V4SFmode, op1);
14081 emit_insn (gen_sse_movups (op0, op1));
14085 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
14086 emit_move_insn (op0, CONST0_RTX (mode));
14088 emit_clobber (op0);
14090 if (mode != V4SFmode)
14091 op0 = gen_lowpart (V4SFmode, op0);
14092 m = adjust_address (op1, V2SFmode, 0);
14093 emit_insn (gen_sse_loadlps (op0, op0, m));
14094 m = adjust_address (op1, V2SFmode, 8);
14095 emit_insn (gen_sse_loadhps (op0, op0, m));
14098 else if (MEM_P (op0))
14100 /* If we're optimizing for size, movups is the smallest. */
14101 if (optimize_insn_for_size_p ()
14102 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14104 op0 = gen_lowpart (V4SFmode, op0);
14105 op1 = gen_lowpart (V4SFmode, op1);
14106 emit_insn (gen_sse_movups (op0, op1));
14110 /* ??? Similar to above, only less clear because of quote
14111 typeless stores unquote. */
14112 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
14113 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14115 op0 = gen_lowpart (V16QImode, op0);
14116 op1 = gen_lowpart (V16QImode, op1);
14117 emit_insn (gen_sse2_movdqu (op0, op1));
14121 if (TARGET_SSE2 && mode == V2DFmode)
14123 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
14125 op0 = gen_lowpart (V2DFmode, op0);
14126 op1 = gen_lowpart (V2DFmode, op1);
14127 emit_insn (gen_sse2_movupd (op0, op1));
14131 m = adjust_address (op0, DFmode, 0);
14132 emit_insn (gen_sse2_storelpd (m, op1));
14133 m = adjust_address (op0, DFmode, 8);
14134 emit_insn (gen_sse2_storehpd (m, op1));
14139 if (mode != V4SFmode)
14140 op1 = gen_lowpart (V4SFmode, op1);
14142 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
14144 op0 = gen_lowpart (V4SFmode, op0);
14145 emit_insn (gen_sse_movups (op0, op1));
14149 m = adjust_address (op0, V2SFmode, 0);
14150 emit_insn (gen_sse_storelps (m, op1));
14151 m = adjust_address (op0, V2SFmode, 8);
14152 emit_insn (gen_sse_storehps (m, op1));
14157 gcc_unreachable ();
14160 /* Expand a push in MODE. This is some mode for which we do not support
14161 proper push instructions, at least from the registers that we expect
14162 the value to live in. */
14165 ix86_expand_push (enum machine_mode mode, rtx x)
14169 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
14170 GEN_INT (-GET_MODE_SIZE (mode)),
14171 stack_pointer_rtx, 1, OPTAB_DIRECT);
14172 if (tmp != stack_pointer_rtx)
14173 emit_move_insn (stack_pointer_rtx, tmp);
14175 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
14177 /* When we push an operand onto stack, it has to be aligned at least
14178 at the function argument boundary. However since we don't have
14179 the argument type, we can't determine the actual argument
14181 emit_move_insn (tmp, x);
14184 /* Helper function of ix86_fixup_binary_operands to canonicalize
14185 operand order. Returns true if the operands should be swapped. */
14188 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
14191 rtx dst = operands[0];
14192 rtx src1 = operands[1];
14193 rtx src2 = operands[2];
14195 /* If the operation is not commutative, we can't do anything. */
14196 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
14199 /* Highest priority is that src1 should match dst. */
14200 if (rtx_equal_p (dst, src1))
14202 if (rtx_equal_p (dst, src2))
14205 /* Next highest priority is that immediate constants come second. */
14206 if (immediate_operand (src2, mode))
14208 if (immediate_operand (src1, mode))
14211 /* Lowest priority is that memory references should come second. */
14221 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
14222 destination to use for the operation. If different from the true
14223 destination in operands[0], a copy operation will be required. */
14226 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
14229 rtx dst = operands[0];
14230 rtx src1 = operands[1];
14231 rtx src2 = operands[2];
14233 /* Canonicalize operand order. */
14234 if (ix86_swap_binary_operands_p (code, mode, operands))
14238 /* It is invalid to swap operands of different modes. */
14239 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
14246 /* Both source operands cannot be in memory. */
14247 if (MEM_P (src1) && MEM_P (src2))
14249 /* Optimization: Only read from memory once. */
14250 if (rtx_equal_p (src1, src2))
14252 src2 = force_reg (mode, src2);
14256 src2 = force_reg (mode, src2);
14259 /* If the destination is memory, and we do not have matching source
14260 operands, do things in registers. */
14261 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
14262 dst = gen_reg_rtx (mode);
14264 /* Source 1 cannot be a constant. */
14265 if (CONSTANT_P (src1))
14266 src1 = force_reg (mode, src1);
14268 /* Source 1 cannot be a non-matching memory. */
14269 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
14270 src1 = force_reg (mode, src1);
14272 operands[1] = src1;
14273 operands[2] = src2;
14277 /* Similarly, but assume that the destination has already been
14278 set up properly. */
14281 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
14282 enum machine_mode mode, rtx operands[])
14284 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
14285 gcc_assert (dst == operands[0]);
14288 /* Attempt to expand a binary operator. Make the expansion closer to the
14289 actual machine, then just general_operand, which will allow 3 separate
14290 memory references (one output, two input) in a single insn. */
14293 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
14296 rtx src1, src2, dst, op, clob;
14298 dst = ix86_fixup_binary_operands (code, mode, operands);
14299 src1 = operands[1];
14300 src2 = operands[2];
14302 /* Emit the instruction. */
14304 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
14305 if (reload_in_progress)
14307 /* Reload doesn't know about the flags register, and doesn't know that
14308 it doesn't want to clobber it. We can only do this with PLUS. */
14309 gcc_assert (code == PLUS);
14314 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14315 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
14318 /* Fix up the destination if needed. */
14319 if (dst != operands[0])
14320 emit_move_insn (operands[0], dst);
14323 /* Return TRUE or FALSE depending on whether the binary operator meets the
14324 appropriate constraints. */
14327 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
14330 rtx dst = operands[0];
14331 rtx src1 = operands[1];
14332 rtx src2 = operands[2];
14334 /* Both source operands cannot be in memory. */
14335 if (MEM_P (src1) && MEM_P (src2))
14338 /* Canonicalize operand order for commutative operators. */
14339 if (ix86_swap_binary_operands_p (code, mode, operands))
14346 /* If the destination is memory, we must have a matching source operand. */
14347 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
14350 /* Source 1 cannot be a constant. */
14351 if (CONSTANT_P (src1))
14354 /* Source 1 cannot be a non-matching memory. */
14355 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
14361 /* Attempt to expand a unary operator. Make the expansion closer to the
14362 actual machine, then just general_operand, which will allow 2 separate
14363 memory references (one output, one input) in a single insn. */
14366 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
14369 int matching_memory;
14370 rtx src, dst, op, clob;
14375 /* If the destination is memory, and we do not have matching source
14376 operands, do things in registers. */
14377 matching_memory = 0;
14380 if (rtx_equal_p (dst, src))
14381 matching_memory = 1;
14383 dst = gen_reg_rtx (mode);
14386 /* When source operand is memory, destination must match. */
14387 if (MEM_P (src) && !matching_memory)
14388 src = force_reg (mode, src);
14390 /* Emit the instruction. */
14392 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
14393 if (reload_in_progress || code == NOT)
14395 /* Reload doesn't know about the flags register, and doesn't know that
14396 it doesn't want to clobber it. */
14397 gcc_assert (code == NOT);
14402 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14403 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
14406 /* Fix up the destination if needed. */
14407 if (dst != operands[0])
14408 emit_move_insn (operands[0], dst);
14411 #define LEA_SEARCH_THRESHOLD 12
14413 /* Search backward for non-agu definition of register number REGNO1
14414 or register number REGNO2 in INSN's basic block until
14415 1. Pass LEA_SEARCH_THRESHOLD instructions, or
14416 2. Reach BB boundary, or
14417 3. Reach agu definition.
14418 Returns the distance between the non-agu definition point and INSN.
14419 If no definition point, returns -1. */
14422 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
14425 basic_block bb = BLOCK_FOR_INSN (insn);
14428 enum attr_type insn_type;
14430 if (insn != BB_HEAD (bb))
14432 rtx prev = PREV_INSN (insn);
14433 while (prev && distance < LEA_SEARCH_THRESHOLD)
14435 if (NONDEBUG_INSN_P (prev))
14438 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
14439 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
14440 && !DF_REF_IS_ARTIFICIAL (*def_rec)
14441 && (regno1 == DF_REF_REGNO (*def_rec)
14442 || regno2 == DF_REF_REGNO (*def_rec)))
14444 insn_type = get_attr_type (prev);
14445 if (insn_type != TYPE_LEA)
14449 if (prev == BB_HEAD (bb))
14451 prev = PREV_INSN (prev);
14455 if (distance < LEA_SEARCH_THRESHOLD)
14459 bool simple_loop = false;
14461 FOR_EACH_EDGE (e, ei, bb->preds)
14464 simple_loop = true;
14470 rtx prev = BB_END (bb);
14473 && distance < LEA_SEARCH_THRESHOLD)
14475 if (NONDEBUG_INSN_P (prev))
14478 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
14479 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
14480 && !DF_REF_IS_ARTIFICIAL (*def_rec)
14481 && (regno1 == DF_REF_REGNO (*def_rec)
14482 || regno2 == DF_REF_REGNO (*def_rec)))
14484 insn_type = get_attr_type (prev);
14485 if (insn_type != TYPE_LEA)
14489 prev = PREV_INSN (prev);
14497 /* get_attr_type may modify recog data. We want to make sure
14498 that recog data is valid for instruction INSN, on which
14499 distance_non_agu_define is called. INSN is unchanged here. */
14500 extract_insn_cached (insn);
14504 /* Return the distance between INSN and the next insn that uses
14505 register number REGNO0 in memory address. Return -1 if no such
14506 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
14509 distance_agu_use (unsigned int regno0, rtx insn)
14511 basic_block bb = BLOCK_FOR_INSN (insn);
14516 if (insn != BB_END (bb))
14518 rtx next = NEXT_INSN (insn);
14519 while (next && distance < LEA_SEARCH_THRESHOLD)
14521 if (NONDEBUG_INSN_P (next))
14525 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
14526 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
14527 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
14528 && regno0 == DF_REF_REGNO (*use_rec))
14530 /* Return DISTANCE if OP0 is used in memory
14531 address in NEXT. */
14535 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
14536 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
14537 && !DF_REF_IS_ARTIFICIAL (*def_rec)
14538 && regno0 == DF_REF_REGNO (*def_rec))
14540 /* Return -1 if OP0 is set in NEXT. */
14544 if (next == BB_END (bb))
14546 next = NEXT_INSN (next);
14550 if (distance < LEA_SEARCH_THRESHOLD)
14554 bool simple_loop = false;
14556 FOR_EACH_EDGE (e, ei, bb->succs)
14559 simple_loop = true;
14565 rtx next = BB_HEAD (bb);
14568 && distance < LEA_SEARCH_THRESHOLD)
14570 if (NONDEBUG_INSN_P (next))
14574 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
14575 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
14576 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
14577 && regno0 == DF_REF_REGNO (*use_rec))
14579 /* Return DISTANCE if OP0 is used in memory
14580 address in NEXT. */
14584 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
14585 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
14586 && !DF_REF_IS_ARTIFICIAL (*def_rec)
14587 && regno0 == DF_REF_REGNO (*def_rec))
14589 /* Return -1 if OP0 is set in NEXT. */
14594 next = NEXT_INSN (next);
14602 /* Define this macro to tune LEA priority vs ADD, it take effect when
14603 there is a dilemma of choicing LEA or ADD
14604 Negative value: ADD is more preferred than LEA
14606 Positive value: LEA is more preferred than ADD*/
14607 #define IX86_LEA_PRIORITY 2
14609 /* Return true if it is ok to optimize an ADD operation to LEA
14610 operation to avoid flag register consumation. For the processors
14611 like ATOM, if the destination register of LEA holds an actual
14612 address which will be used soon, LEA is better and otherwise ADD
14616 ix86_lea_for_add_ok (enum rtx_code code ATTRIBUTE_UNUSED,
14617 rtx insn, rtx operands[])
14619 unsigned int regno0 = true_regnum (operands[0]);
14620 unsigned int regno1 = true_regnum (operands[1]);
14621 unsigned int regno2;
14623 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
14624 return regno0 != regno1;
14626 regno2 = true_regnum (operands[2]);
14628 /* If a = b + c, (a!=b && a!=c), must use lea form. */
14629 if (regno0 != regno1 && regno0 != regno2)
14633 int dist_define, dist_use;
14634 dist_define = distance_non_agu_define (regno1, regno2, insn);
14635 if (dist_define <= 0)
14638 /* If this insn has both backward non-agu dependence and forward
14639 agu dependence, the one with short distance take effect. */
14640 dist_use = distance_agu_use (regno0, insn);
14642 || (dist_define + IX86_LEA_PRIORITY) < dist_use)
14649 /* Return true if destination reg of SET_BODY is shift count of
14653 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
14659 /* Retrieve destination of SET_BODY. */
14660 switch (GET_CODE (set_body))
14663 set_dest = SET_DEST (set_body);
14664 if (!set_dest || !REG_P (set_dest))
14668 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
14669 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
14677 /* Retrieve shift count of USE_BODY. */
14678 switch (GET_CODE (use_body))
14681 shift_rtx = XEXP (use_body, 1);
14684 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
14685 if (ix86_dep_by_shift_count_body (set_body,
14686 XVECEXP (use_body, 0, i)))
14694 && (GET_CODE (shift_rtx) == ASHIFT
14695 || GET_CODE (shift_rtx) == LSHIFTRT
14696 || GET_CODE (shift_rtx) == ASHIFTRT
14697 || GET_CODE (shift_rtx) == ROTATE
14698 || GET_CODE (shift_rtx) == ROTATERT))
14700 rtx shift_count = XEXP (shift_rtx, 1);
14702 /* Return true if shift count is dest of SET_BODY. */
14703 if (REG_P (shift_count)
14704 && true_regnum (set_dest) == true_regnum (shift_count))
14711 /* Return true if destination reg of SET_INSN is shift count of
14715 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
14717 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
14718 PATTERN (use_insn));
14721 /* Return TRUE or FALSE depending on whether the unary operator meets the
14722 appropriate constraints. */
14725 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
14726 enum machine_mode mode ATTRIBUTE_UNUSED,
14727 rtx operands[2] ATTRIBUTE_UNUSED)
14729 /* If one of operands is memory, source and destination must match. */
14730 if ((MEM_P (operands[0])
14731 || MEM_P (operands[1]))
14732 && ! rtx_equal_p (operands[0], operands[1]))
14737 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
14738 are ok, keeping in mind the possible movddup alternative. */
14741 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
14743 if (MEM_P (operands[0]))
14744 return rtx_equal_p (operands[0], operands[1 + high]);
14745 if (MEM_P (operands[1]) && MEM_P (operands[2]))
14746 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
14750 /* Post-reload splitter for converting an SF or DFmode value in an
14751 SSE register into an unsigned SImode. */
14754 ix86_split_convert_uns_si_sse (rtx operands[])
14756 enum machine_mode vecmode;
14757 rtx value, large, zero_or_two31, input, two31, x;
14759 large = operands[1];
14760 zero_or_two31 = operands[2];
14761 input = operands[3];
14762 two31 = operands[4];
14763 vecmode = GET_MODE (large);
14764 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
14766 /* Load up the value into the low element. We must ensure that the other
14767 elements are valid floats -- zero is the easiest such value. */
14770 if (vecmode == V4SFmode)
14771 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
14773 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
14777 input = gen_rtx_REG (vecmode, REGNO (input));
14778 emit_move_insn (value, CONST0_RTX (vecmode));
14779 if (vecmode == V4SFmode)
14780 emit_insn (gen_sse_movss (value, value, input));
14782 emit_insn (gen_sse2_movsd (value, value, input));
14785 emit_move_insn (large, two31);
14786 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
14788 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
14789 emit_insn (gen_rtx_SET (VOIDmode, large, x));
14791 x = gen_rtx_AND (vecmode, zero_or_two31, large);
14792 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
14794 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
14795 emit_insn (gen_rtx_SET (VOIDmode, value, x));
14797 large = gen_rtx_REG (V4SImode, REGNO (large));
14798 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
14800 x = gen_rtx_REG (V4SImode, REGNO (value));
14801 if (vecmode == V4SFmode)
14802 emit_insn (gen_sse2_cvttps2dq (x, value));
14804 emit_insn (gen_sse2_cvttpd2dq (x, value));
14807 emit_insn (gen_xorv4si3 (value, value, large));
14810 /* Convert an unsigned DImode value into a DFmode, using only SSE.
14811 Expects the 64-bit DImode to be supplied in a pair of integral
14812 registers. Requires SSE2; will use SSE3 if available. For x86_32,
14813 -mfpmath=sse, !optimize_size only. */
14816 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
14818 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
14819 rtx int_xmm, fp_xmm;
14820 rtx biases, exponents;
14823 int_xmm = gen_reg_rtx (V4SImode);
14824 if (TARGET_INTER_UNIT_MOVES)
14825 emit_insn (gen_movdi_to_sse (int_xmm, input));
14826 else if (TARGET_SSE_SPLIT_REGS)
14828 emit_clobber (int_xmm);
14829 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
14833 x = gen_reg_rtx (V2DImode);
14834 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
14835 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
14838 x = gen_rtx_CONST_VECTOR (V4SImode,
14839 gen_rtvec (4, GEN_INT (0x43300000UL),
14840 GEN_INT (0x45300000UL),
14841 const0_rtx, const0_rtx));
14842 exponents = validize_mem (force_const_mem (V4SImode, x));
14844 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
14845 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
14847 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
14848 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
14849 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
14850 (0x1.0p84 + double(fp_value_hi_xmm)).
14851 Note these exponents differ by 32. */
14853 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
14855 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
14856 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
14857 real_ldexp (&bias_lo_rvt, &dconst1, 52);
14858 real_ldexp (&bias_hi_rvt, &dconst1, 84);
14859 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
14860 x = const_double_from_real_value (bias_hi_rvt, DFmode);
14861 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
14862 biases = validize_mem (force_const_mem (V2DFmode, biases));
14863 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
14865 /* Add the upper and lower DFmode values together. */
14867 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
14870 x = copy_to_mode_reg (V2DFmode, fp_xmm);
14871 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
14872 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
14875 ix86_expand_vector_extract (false, target, fp_xmm, 0);
14878 /* Not used, but eases macroization of patterns. */
14880 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
14881 rtx input ATTRIBUTE_UNUSED)
14883 gcc_unreachable ();
14886 /* Convert an unsigned SImode value into a DFmode. Only currently used
14887 for SSE, but applicable anywhere. */
14890 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
14892 REAL_VALUE_TYPE TWO31r;
14895 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
14896 NULL, 1, OPTAB_DIRECT);
14898 fp = gen_reg_rtx (DFmode);
14899 emit_insn (gen_floatsidf2 (fp, x));
14901 real_ldexp (&TWO31r, &dconst1, 31);
14902 x = const_double_from_real_value (TWO31r, DFmode);
14904 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
14906 emit_move_insn (target, x);
14909 /* Convert a signed DImode value into a DFmode. Only used for SSE in
14910 32-bit mode; otherwise we have a direct convert instruction. */
14913 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
14915 REAL_VALUE_TYPE TWO32r;
14916 rtx fp_lo, fp_hi, x;
14918 fp_lo = gen_reg_rtx (DFmode);
14919 fp_hi = gen_reg_rtx (DFmode);
14921 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
14923 real_ldexp (&TWO32r, &dconst1, 32);
14924 x = const_double_from_real_value (TWO32r, DFmode);
14925 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
14927 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
14929 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
14932 emit_move_insn (target, x);
14935 /* Convert an unsigned SImode value into a SFmode, using only SSE.
14936 For x86_32, -mfpmath=sse, !optimize_size only. */
14938 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
14940 REAL_VALUE_TYPE ONE16r;
14941 rtx fp_hi, fp_lo, int_hi, int_lo, x;
14943 real_ldexp (&ONE16r, &dconst1, 16);
14944 x = const_double_from_real_value (ONE16r, SFmode);
14945 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
14946 NULL, 0, OPTAB_DIRECT);
14947 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
14948 NULL, 0, OPTAB_DIRECT);
14949 fp_hi = gen_reg_rtx (SFmode);
14950 fp_lo = gen_reg_rtx (SFmode);
14951 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
14952 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
14953 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
14955 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
14957 if (!rtx_equal_p (target, fp_hi))
14958 emit_move_insn (target, fp_hi);
14961 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
14962 then replicate the value for all elements of the vector
14966 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
14973 v = gen_rtvec (4, value, value, value, value);
14974 return gen_rtx_CONST_VECTOR (V4SImode, v);
14978 v = gen_rtvec (2, value, value);
14979 return gen_rtx_CONST_VECTOR (V2DImode, v);
14983 v = gen_rtvec (4, value, value, value, value);
14985 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
14986 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
14987 return gen_rtx_CONST_VECTOR (V4SFmode, v);
14991 v = gen_rtvec (2, value, value);
14993 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
14994 return gen_rtx_CONST_VECTOR (V2DFmode, v);
14997 gcc_unreachable ();
15001 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
15002 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
15003 for an SSE register. If VECT is true, then replicate the mask for
15004 all elements of the vector register. If INVERT is true, then create
15005 a mask excluding the sign bit. */
15008 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
15010 enum machine_mode vec_mode, imode;
15011 HOST_WIDE_INT hi, lo;
15016 /* Find the sign bit, sign extended to 2*HWI. */
15022 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
15023 lo = 0x80000000, hi = lo < 0;
15029 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
15030 if (HOST_BITS_PER_WIDE_INT >= 64)
15031 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
15033 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
15038 vec_mode = VOIDmode;
15039 if (HOST_BITS_PER_WIDE_INT >= 64)
15042 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
15049 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
15053 lo = ~lo, hi = ~hi;
15059 mask = immed_double_const (lo, hi, imode);
15061 vec = gen_rtvec (2, v, mask);
15062 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
15063 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
15070 gcc_unreachable ();
15074 lo = ~lo, hi = ~hi;
15076 /* Force this value into the low part of a fp vector constant. */
15077 mask = immed_double_const (lo, hi, imode);
15078 mask = gen_lowpart (mode, mask);
15080 if (vec_mode == VOIDmode)
15081 return force_reg (mode, mask);
15083 v = ix86_build_const_vector (mode, vect, mask);
15084 return force_reg (vec_mode, v);
15087 /* Generate code for floating point ABS or NEG. */
15090 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
15093 rtx mask, set, use, clob, dst, src;
15094 bool use_sse = false;
15095 bool vector_mode = VECTOR_MODE_P (mode);
15096 enum machine_mode elt_mode = mode;
15100 elt_mode = GET_MODE_INNER (mode);
15103 else if (mode == TFmode)
15105 else if (TARGET_SSE_MATH)
15106 use_sse = SSE_FLOAT_MODE_P (mode);
15108 /* NEG and ABS performed with SSE use bitwise mask operations.
15109 Create the appropriate mask now. */
15111 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
15120 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
15121 set = gen_rtx_SET (VOIDmode, dst, set);
15126 set = gen_rtx_fmt_e (code, mode, src);
15127 set = gen_rtx_SET (VOIDmode, dst, set);
15130 use = gen_rtx_USE (VOIDmode, mask);
15131 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15132 emit_insn (gen_rtx_PARALLEL (VOIDmode,
15133 gen_rtvec (3, set, use, clob)));
15140 /* Expand a copysign operation. Special case operand 0 being a constant. */
15143 ix86_expand_copysign (rtx operands[])
15145 enum machine_mode mode;
15146 rtx dest, op0, op1, mask, nmask;
15148 dest = operands[0];
15152 mode = GET_MODE (dest);
15154 if (GET_CODE (op0) == CONST_DOUBLE)
15156 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
15158 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
15159 op0 = simplify_unary_operation (ABS, mode, op0, mode);
15161 if (mode == SFmode || mode == DFmode)
15163 enum machine_mode vmode;
15165 vmode = mode == SFmode ? V4SFmode : V2DFmode;
15167 if (op0 == CONST0_RTX (mode))
15168 op0 = CONST0_RTX (vmode);
15171 rtx v = ix86_build_const_vector (mode, false, op0);
15173 op0 = force_reg (vmode, v);
15176 else if (op0 != CONST0_RTX (mode))
15177 op0 = force_reg (mode, op0);
15179 mask = ix86_build_signbit_mask (mode, 0, 0);
15181 if (mode == SFmode)
15182 copysign_insn = gen_copysignsf3_const;
15183 else if (mode == DFmode)
15184 copysign_insn = gen_copysigndf3_const;
15186 copysign_insn = gen_copysigntf3_const;
15188 emit_insn (copysign_insn (dest, op0, op1, mask));
15192 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
15194 nmask = ix86_build_signbit_mask (mode, 0, 1);
15195 mask = ix86_build_signbit_mask (mode, 0, 0);
15197 if (mode == SFmode)
15198 copysign_insn = gen_copysignsf3_var;
15199 else if (mode == DFmode)
15200 copysign_insn = gen_copysigndf3_var;
15202 copysign_insn = gen_copysigntf3_var;
15204 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
15208 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
15209 be a constant, and so has already been expanded into a vector constant. */
15212 ix86_split_copysign_const (rtx operands[])
15214 enum machine_mode mode, vmode;
15215 rtx dest, op0, mask, x;
15217 dest = operands[0];
15219 mask = operands[3];
15221 mode = GET_MODE (dest);
15222 vmode = GET_MODE (mask);
15224 dest = simplify_gen_subreg (vmode, dest, mode, 0);
15225 x = gen_rtx_AND (vmode, dest, mask);
15226 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15228 if (op0 != CONST0_RTX (vmode))
15230 x = gen_rtx_IOR (vmode, dest, op0);
15231 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15235 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
15236 so we have to do two masks. */
15239 ix86_split_copysign_var (rtx operands[])
15241 enum machine_mode mode, vmode;
15242 rtx dest, scratch, op0, op1, mask, nmask, x;
15244 dest = operands[0];
15245 scratch = operands[1];
15248 nmask = operands[4];
15249 mask = operands[5];
15251 mode = GET_MODE (dest);
15252 vmode = GET_MODE (mask);
15254 if (rtx_equal_p (op0, op1))
15256 /* Shouldn't happen often (it's useless, obviously), but when it does
15257 we'd generate incorrect code if we continue below. */
15258 emit_move_insn (dest, op0);
15262 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
15264 gcc_assert (REGNO (op1) == REGNO (scratch));
15266 x = gen_rtx_AND (vmode, scratch, mask);
15267 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
15270 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
15271 x = gen_rtx_NOT (vmode, dest);
15272 x = gen_rtx_AND (vmode, x, op0);
15273 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15277 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
15279 x = gen_rtx_AND (vmode, scratch, mask);
15281 else /* alternative 2,4 */
15283 gcc_assert (REGNO (mask) == REGNO (scratch));
15284 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
15285 x = gen_rtx_AND (vmode, scratch, op1);
15287 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
15289 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
15291 dest = simplify_gen_subreg (vmode, op0, mode, 0);
15292 x = gen_rtx_AND (vmode, dest, nmask);
15294 else /* alternative 3,4 */
15296 gcc_assert (REGNO (nmask) == REGNO (dest));
15298 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
15299 x = gen_rtx_AND (vmode, dest, op0);
15301 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15304 x = gen_rtx_IOR (vmode, dest, scratch);
15305 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15308 /* Return TRUE or FALSE depending on whether the first SET in INSN
15309 has source and destination with matching CC modes, and that the
15310 CC mode is at least as constrained as REQ_MODE. */
15313 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
15316 enum machine_mode set_mode;
15318 set = PATTERN (insn);
15319 if (GET_CODE (set) == PARALLEL)
15320 set = XVECEXP (set, 0, 0);
15321 gcc_assert (GET_CODE (set) == SET);
15322 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
15324 set_mode = GET_MODE (SET_DEST (set));
15328 if (req_mode != CCNOmode
15329 && (req_mode != CCmode
15330 || XEXP (SET_SRC (set), 1) != const0_rtx))
15334 if (req_mode == CCGCmode)
15338 if (req_mode == CCGOCmode || req_mode == CCNOmode)
15342 if (req_mode == CCZmode)
15353 gcc_unreachable ();
15356 return (GET_MODE (SET_SRC (set)) == set_mode);
15359 /* Generate insn patterns to do an integer compare of OPERANDS. */
15362 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
15364 enum machine_mode cmpmode;
15367 cmpmode = SELECT_CC_MODE (code, op0, op1);
15368 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
15370 /* This is very simple, but making the interface the same as in the
15371 FP case makes the rest of the code easier. */
15372 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
15373 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
15375 /* Return the test that should be put into the flags user, i.e.
15376 the bcc, scc, or cmov instruction. */
15377 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
15380 /* Figure out whether to use ordered or unordered fp comparisons.
15381 Return the appropriate mode to use. */
15384 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
15386 /* ??? In order to make all comparisons reversible, we do all comparisons
15387 non-trapping when compiling for IEEE. Once gcc is able to distinguish
15388 all forms trapping and nontrapping comparisons, we can make inequality
15389 comparisons trapping again, since it results in better code when using
15390 FCOM based compares. */
15391 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
15395 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
15397 enum machine_mode mode = GET_MODE (op0);
15399 if (SCALAR_FLOAT_MODE_P (mode))
15401 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
15402 return ix86_fp_compare_mode (code);
15407 /* Only zero flag is needed. */
15408 case EQ: /* ZF=0 */
15409 case NE: /* ZF!=0 */
15411 /* Codes needing carry flag. */
15412 case GEU: /* CF=0 */
15413 case LTU: /* CF=1 */
15414 /* Detect overflow checks. They need just the carry flag. */
15415 if (GET_CODE (op0) == PLUS
15416 && rtx_equal_p (op1, XEXP (op0, 0)))
15420 case GTU: /* CF=0 & ZF=0 */
15421 case LEU: /* CF=1 | ZF=1 */
15422 /* Detect overflow checks. They need just the carry flag. */
15423 if (GET_CODE (op0) == MINUS
15424 && rtx_equal_p (op1, XEXP (op0, 0)))
15428 /* Codes possibly doable only with sign flag when
15429 comparing against zero. */
15430 case GE: /* SF=OF or SF=0 */
15431 case LT: /* SF<>OF or SF=1 */
15432 if (op1 == const0_rtx)
15435 /* For other cases Carry flag is not required. */
15437 /* Codes doable only with sign flag when comparing
15438 against zero, but we miss jump instruction for it
15439 so we need to use relational tests against overflow
15440 that thus needs to be zero. */
15441 case GT: /* ZF=0 & SF=OF */
15442 case LE: /* ZF=1 | SF<>OF */
15443 if (op1 == const0_rtx)
15447 /* strcmp pattern do (use flags) and combine may ask us for proper
15452 gcc_unreachable ();
15456 /* Return the fixed registers used for condition codes. */
15459 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
15466 /* If two condition code modes are compatible, return a condition code
15467 mode which is compatible with both. Otherwise, return
15470 static enum machine_mode
15471 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
15476 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
15479 if ((m1 == CCGCmode && m2 == CCGOCmode)
15480 || (m1 == CCGOCmode && m2 == CCGCmode))
15486 gcc_unreachable ();
15516 /* These are only compatible with themselves, which we already
15523 /* Return a comparison we can do and that it is equivalent to
15524 swap_condition (code) apart possibly from orderedness.
15525 But, never change orderedness if TARGET_IEEE_FP, returning
15526 UNKNOWN in that case if necessary. */
15528 static enum rtx_code
15529 ix86_fp_swap_condition (enum rtx_code code)
15533 case GT: /* GTU - CF=0 & ZF=0 */
15534 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
15535 case GE: /* GEU - CF=0 */
15536 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
15537 case UNLT: /* LTU - CF=1 */
15538 return TARGET_IEEE_FP ? UNKNOWN : GT;
15539 case UNLE: /* LEU - CF=1 | ZF=1 */
15540 return TARGET_IEEE_FP ? UNKNOWN : GE;
15542 return swap_condition (code);
15546 /* Return cost of comparison CODE using the best strategy for performance.
15547 All following functions do use number of instructions as a cost metrics.
15548 In future this should be tweaked to compute bytes for optimize_size and
15549 take into account performance of various instructions on various CPUs. */
15552 ix86_fp_comparison_cost (enum rtx_code code)
15556 /* The cost of code using bit-twiddling on %ah. */
15573 arith_cost = TARGET_IEEE_FP ? 5 : 4;
15577 arith_cost = TARGET_IEEE_FP ? 6 : 4;
15580 gcc_unreachable ();
15583 switch (ix86_fp_comparison_strategy (code))
15585 case IX86_FPCMP_COMI:
15586 return arith_cost > 4 ? 3 : 2;
15587 case IX86_FPCMP_SAHF:
15588 return arith_cost > 4 ? 4 : 3;
15594 /* Return strategy to use for floating-point. We assume that fcomi is always
15595 preferrable where available, since that is also true when looking at size
15596 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
15598 enum ix86_fpcmp_strategy
15599 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
15601 /* Do fcomi/sahf based test when profitable. */
15604 return IX86_FPCMP_COMI;
15606 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
15607 return IX86_FPCMP_SAHF;
15609 return IX86_FPCMP_ARITH;
15612 /* Swap, force into registers, or otherwise massage the two operands
15613 to a fp comparison. The operands are updated in place; the new
15614 comparison code is returned. */
15616 static enum rtx_code
15617 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
15619 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
15620 rtx op0 = *pop0, op1 = *pop1;
15621 enum machine_mode op_mode = GET_MODE (op0);
15622 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
15624 /* All of the unordered compare instructions only work on registers.
15625 The same is true of the fcomi compare instructions. The XFmode
15626 compare instructions require registers except when comparing
15627 against zero or when converting operand 1 from fixed point to
15631 && (fpcmp_mode == CCFPUmode
15632 || (op_mode == XFmode
15633 && ! (standard_80387_constant_p (op0) == 1
15634 || standard_80387_constant_p (op1) == 1)
15635 && GET_CODE (op1) != FLOAT)
15636 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
15638 op0 = force_reg (op_mode, op0);
15639 op1 = force_reg (op_mode, op1);
15643 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
15644 things around if they appear profitable, otherwise force op0
15645 into a register. */
15647 if (standard_80387_constant_p (op0) == 0
15649 && ! (standard_80387_constant_p (op1) == 0
15652 enum rtx_code new_code = ix86_fp_swap_condition (code);
15653 if (new_code != UNKNOWN)
15656 tmp = op0, op0 = op1, op1 = tmp;
15662 op0 = force_reg (op_mode, op0);
15664 if (CONSTANT_P (op1))
15666 int tmp = standard_80387_constant_p (op1);
15668 op1 = validize_mem (force_const_mem (op_mode, op1));
15672 op1 = force_reg (op_mode, op1);
15675 op1 = force_reg (op_mode, op1);
15679 /* Try to rearrange the comparison to make it cheaper. */
15680 if (ix86_fp_comparison_cost (code)
15681 > ix86_fp_comparison_cost (swap_condition (code))
15682 && (REG_P (op1) || can_create_pseudo_p ()))
15685 tmp = op0, op0 = op1, op1 = tmp;
15686 code = swap_condition (code);
15688 op0 = force_reg (op_mode, op0);
15696 /* Convert comparison codes we use to represent FP comparison to integer
15697 code that will result in proper branch. Return UNKNOWN if no such code
15701 ix86_fp_compare_code_to_integer (enum rtx_code code)
15730 /* Generate insn patterns to do a floating point compare of OPERANDS. */
15733 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
15735 enum machine_mode fpcmp_mode, intcmp_mode;
15738 fpcmp_mode = ix86_fp_compare_mode (code);
15739 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
15741 /* Do fcomi/sahf based test when profitable. */
15742 switch (ix86_fp_comparison_strategy (code))
15744 case IX86_FPCMP_COMI:
15745 intcmp_mode = fpcmp_mode;
15746 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
15747 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
15752 case IX86_FPCMP_SAHF:
15753 intcmp_mode = fpcmp_mode;
15754 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
15755 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
15759 scratch = gen_reg_rtx (HImode);
15760 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
15761 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
15764 case IX86_FPCMP_ARITH:
15765 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
15766 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
15767 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
15769 scratch = gen_reg_rtx (HImode);
15770 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
15772 /* In the unordered case, we have to check C2 for NaN's, which
15773 doesn't happen to work out to anything nice combination-wise.
15774 So do some bit twiddling on the value we've got in AH to come
15775 up with an appropriate set of condition codes. */
15777 intcmp_mode = CCNOmode;
15782 if (code == GT || !TARGET_IEEE_FP)
15784 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
15789 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
15790 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
15791 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
15792 intcmp_mode = CCmode;
15798 if (code == LT && TARGET_IEEE_FP)
15800 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
15801 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
15802 intcmp_mode = CCmode;
15807 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
15813 if (code == GE || !TARGET_IEEE_FP)
15815 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
15820 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
15821 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
15827 if (code == LE && TARGET_IEEE_FP)
15829 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
15830 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
15831 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
15832 intcmp_mode = CCmode;
15837 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
15843 if (code == EQ && TARGET_IEEE_FP)
15845 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
15846 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
15847 intcmp_mode = CCmode;
15852 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
15858 if (code == NE && TARGET_IEEE_FP)
15860 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
15861 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
15867 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
15873 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
15877 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
15882 gcc_unreachable ();
15890 /* Return the test that should be put into the flags user, i.e.
15891 the bcc, scc, or cmov instruction. */
15892 return gen_rtx_fmt_ee (code, VOIDmode,
15893 gen_rtx_REG (intcmp_mode, FLAGS_REG),
15898 ix86_expand_compare (enum rtx_code code)
15901 op0 = ix86_compare_op0;
15902 op1 = ix86_compare_op1;
15904 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC)
15905 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_op0, ix86_compare_op1);
15907 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
15909 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
15910 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
15913 ret = ix86_expand_int_compare (code, op0, op1);
15919 ix86_expand_branch (enum rtx_code code, rtx label)
15923 switch (GET_MODE (ix86_compare_op0))
15932 tmp = ix86_expand_compare (code);
15933 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
15934 gen_rtx_LABEL_REF (VOIDmode, label),
15936 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
15943 /* Expand DImode branch into multiple compare+branch. */
15945 rtx lo[2], hi[2], label2;
15946 enum rtx_code code1, code2, code3;
15947 enum machine_mode submode;
15949 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
15951 tmp = ix86_compare_op0;
15952 ix86_compare_op0 = ix86_compare_op1;
15953 ix86_compare_op1 = tmp;
15954 code = swap_condition (code);
15956 if (GET_MODE (ix86_compare_op0) == DImode)
15958 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
15959 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
15964 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
15965 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
15969 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
15970 avoid two branches. This costs one extra insn, so disable when
15971 optimizing for size. */
15973 if ((code == EQ || code == NE)
15974 && (!optimize_insn_for_size_p ()
15975 || hi[1] == const0_rtx || lo[1] == const0_rtx))
15980 if (hi[1] != const0_rtx)
15981 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
15982 NULL_RTX, 0, OPTAB_WIDEN);
15985 if (lo[1] != const0_rtx)
15986 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
15987 NULL_RTX, 0, OPTAB_WIDEN);
15989 tmp = expand_binop (submode, ior_optab, xor1, xor0,
15990 NULL_RTX, 0, OPTAB_WIDEN);
15992 ix86_compare_op0 = tmp;
15993 ix86_compare_op1 = const0_rtx;
15994 ix86_expand_branch (code, label);
15998 /* Otherwise, if we are doing less-than or greater-or-equal-than,
15999 op1 is a constant and the low word is zero, then we can just
16000 examine the high word. Similarly for low word -1 and
16001 less-or-equal-than or greater-than. */
16003 if (CONST_INT_P (hi[1]))
16006 case LT: case LTU: case GE: case GEU:
16007 if (lo[1] == const0_rtx)
16009 ix86_compare_op0 = hi[0];
16010 ix86_compare_op1 = hi[1];
16011 ix86_expand_branch (code, label);
16015 case LE: case LEU: case GT: case GTU:
16016 if (lo[1] == constm1_rtx)
16018 ix86_compare_op0 = hi[0];
16019 ix86_compare_op1 = hi[1];
16020 ix86_expand_branch (code, label);
16028 /* Otherwise, we need two or three jumps. */
16030 label2 = gen_label_rtx ();
16033 code2 = swap_condition (code);
16034 code3 = unsigned_condition (code);
16038 case LT: case GT: case LTU: case GTU:
16041 case LE: code1 = LT; code2 = GT; break;
16042 case GE: code1 = GT; code2 = LT; break;
16043 case LEU: code1 = LTU; code2 = GTU; break;
16044 case GEU: code1 = GTU; code2 = LTU; break;
16046 case EQ: code1 = UNKNOWN; code2 = NE; break;
16047 case NE: code2 = UNKNOWN; break;
16050 gcc_unreachable ();
16055 * if (hi(a) < hi(b)) goto true;
16056 * if (hi(a) > hi(b)) goto false;
16057 * if (lo(a) < lo(b)) goto true;
16061 ix86_compare_op0 = hi[0];
16062 ix86_compare_op1 = hi[1];
16064 if (code1 != UNKNOWN)
16065 ix86_expand_branch (code1, label);
16066 if (code2 != UNKNOWN)
16067 ix86_expand_branch (code2, label2);
16069 ix86_compare_op0 = lo[0];
16070 ix86_compare_op1 = lo[1];
16071 ix86_expand_branch (code3, label);
16073 if (code2 != UNKNOWN)
16074 emit_label (label2);
16079 /* If we have already emitted a compare insn, go straight to simple.
16080 ix86_expand_compare won't emit anything if ix86_compare_emitted
16082 gcc_assert (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC);
16087 /* Split branch based on floating point condition. */
16089 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
16090 rtx target1, rtx target2, rtx tmp, rtx pushed)
16095 if (target2 != pc_rtx)
16098 code = reverse_condition_maybe_unordered (code);
16103 condition = ix86_expand_fp_compare (code, op1, op2,
16106 /* Remove pushed operand from stack. */
16108 ix86_free_from_memory (GET_MODE (pushed));
16110 i = emit_jump_insn (gen_rtx_SET
16112 gen_rtx_IF_THEN_ELSE (VOIDmode,
16113 condition, target1, target2)));
16114 if (split_branch_probability >= 0)
16115 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
16119 ix86_expand_setcc (enum rtx_code code, rtx dest)
16123 gcc_assert (GET_MODE (dest) == QImode);
16125 ret = ix86_expand_compare (code);
16126 PUT_MODE (ret, QImode);
16127 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
16130 /* Expand comparison setting or clearing carry flag. Return true when
16131 successful and set pop for the operation. */
16133 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
16135 enum machine_mode mode =
16136 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
16138 /* Do not handle DImode compares that go through special path. */
16139 if (mode == (TARGET_64BIT ? TImode : DImode))
16142 if (SCALAR_FLOAT_MODE_P (mode))
16144 rtx compare_op, compare_seq;
16146 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
16148 /* Shortcut: following common codes never translate
16149 into carry flag compares. */
16150 if (code == EQ || code == NE || code == UNEQ || code == LTGT
16151 || code == ORDERED || code == UNORDERED)
16154 /* These comparisons require zero flag; swap operands so they won't. */
16155 if ((code == GT || code == UNLE || code == LE || code == UNGT)
16156 && !TARGET_IEEE_FP)
16161 code = swap_condition (code);
16164 /* Try to expand the comparison and verify that we end up with
16165 carry flag based comparison. This fails to be true only when
16166 we decide to expand comparison using arithmetic that is not
16167 too common scenario. */
16169 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
16170 compare_seq = get_insns ();
16173 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
16174 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
16175 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
16177 code = GET_CODE (compare_op);
16179 if (code != LTU && code != GEU)
16182 emit_insn (compare_seq);
16187 if (!INTEGRAL_MODE_P (mode))
16196 /* Convert a==0 into (unsigned)a<1. */
16199 if (op1 != const0_rtx)
16202 code = (code == EQ ? LTU : GEU);
16205 /* Convert a>b into b<a or a>=b-1. */
16208 if (CONST_INT_P (op1))
16210 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
16211 /* Bail out on overflow. We still can swap operands but that
16212 would force loading of the constant into register. */
16213 if (op1 == const0_rtx
16214 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
16216 code = (code == GTU ? GEU : LTU);
16223 code = (code == GTU ? LTU : GEU);
16227 /* Convert a>=0 into (unsigned)a<0x80000000. */
16230 if (mode == DImode || op1 != const0_rtx)
16232 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
16233 code = (code == LT ? GEU : LTU);
16237 if (mode == DImode || op1 != constm1_rtx)
16239 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
16240 code = (code == LE ? GEU : LTU);
16246 /* Swapping operands may cause constant to appear as first operand. */
16247 if (!nonimmediate_operand (op0, VOIDmode))
16249 if (!can_create_pseudo_p ())
16251 op0 = force_reg (mode, op0);
16253 ix86_compare_op0 = op0;
16254 ix86_compare_op1 = op1;
16255 *pop = ix86_expand_compare (code);
16256 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
16261 ix86_expand_int_movcc (rtx operands[])
16263 enum rtx_code code = GET_CODE (operands[1]), compare_code;
16264 rtx compare_seq, compare_op;
16265 enum machine_mode mode = GET_MODE (operands[0]);
16266 bool sign_bit_compare_p = false;
16269 ix86_compare_op0 = XEXP (operands[1], 0);
16270 ix86_compare_op1 = XEXP (operands[1], 1);
16271 compare_op = ix86_expand_compare (code);
16272 compare_seq = get_insns ();
16275 compare_code = GET_CODE (compare_op);
16277 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
16278 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
16279 sign_bit_compare_p = true;
16281 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
16282 HImode insns, we'd be swallowed in word prefix ops. */
16284 if ((mode != HImode || TARGET_FAST_PREFIX)
16285 && (mode != (TARGET_64BIT ? TImode : DImode))
16286 && CONST_INT_P (operands[2])
16287 && CONST_INT_P (operands[3]))
16289 rtx out = operands[0];
16290 HOST_WIDE_INT ct = INTVAL (operands[2]);
16291 HOST_WIDE_INT cf = INTVAL (operands[3]);
16292 HOST_WIDE_INT diff;
16295 /* Sign bit compares are better done using shifts than we do by using
16297 if (sign_bit_compare_p
16298 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
16299 ix86_compare_op1, &compare_op))
16301 /* Detect overlap between destination and compare sources. */
16304 if (!sign_bit_compare_p)
16307 bool fpcmp = false;
16309 compare_code = GET_CODE (compare_op);
16311 flags = XEXP (compare_op, 0);
16313 if (GET_MODE (flags) == CCFPmode
16314 || GET_MODE (flags) == CCFPUmode)
16318 = ix86_fp_compare_code_to_integer (compare_code);
16321 /* To simplify rest of code, restrict to the GEU case. */
16322 if (compare_code == LTU)
16324 HOST_WIDE_INT tmp = ct;
16327 compare_code = reverse_condition (compare_code);
16328 code = reverse_condition (code);
16333 PUT_CODE (compare_op,
16334 reverse_condition_maybe_unordered
16335 (GET_CODE (compare_op)));
16337 PUT_CODE (compare_op,
16338 reverse_condition (GET_CODE (compare_op)));
16342 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
16343 || reg_overlap_mentioned_p (out, ix86_compare_op1))
16344 tmp = gen_reg_rtx (mode);
16346 if (mode == DImode)
16347 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
16349 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
16350 flags, compare_op));
16354 if (code == GT || code == GE)
16355 code = reverse_condition (code);
16358 HOST_WIDE_INT tmp = ct;
16363 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
16364 ix86_compare_op1, VOIDmode, 0, -1);
16377 tmp = expand_simple_binop (mode, PLUS,
16379 copy_rtx (tmp), 1, OPTAB_DIRECT);
16390 tmp = expand_simple_binop (mode, IOR,
16392 copy_rtx (tmp), 1, OPTAB_DIRECT);
16394 else if (diff == -1 && ct)
16404 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
16406 tmp = expand_simple_binop (mode, PLUS,
16407 copy_rtx (tmp), GEN_INT (cf),
16408 copy_rtx (tmp), 1, OPTAB_DIRECT);
16416 * andl cf - ct, dest
16426 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
16429 tmp = expand_simple_binop (mode, AND,
16431 gen_int_mode (cf - ct, mode),
16432 copy_rtx (tmp), 1, OPTAB_DIRECT);
16434 tmp = expand_simple_binop (mode, PLUS,
16435 copy_rtx (tmp), GEN_INT (ct),
16436 copy_rtx (tmp), 1, OPTAB_DIRECT);
16439 if (!rtx_equal_p (tmp, out))
16440 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
16442 return 1; /* DONE */
16447 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
16450 tmp = ct, ct = cf, cf = tmp;
16453 if (SCALAR_FLOAT_MODE_P (cmp_mode))
16455 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
16457 /* We may be reversing unordered compare to normal compare, that
16458 is not valid in general (we may convert non-trapping condition
16459 to trapping one), however on i386 we currently emit all
16460 comparisons unordered. */
16461 compare_code = reverse_condition_maybe_unordered (compare_code);
16462 code = reverse_condition_maybe_unordered (code);
16466 compare_code = reverse_condition (compare_code);
16467 code = reverse_condition (code);
16471 compare_code = UNKNOWN;
16472 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
16473 && CONST_INT_P (ix86_compare_op1))
16475 if (ix86_compare_op1 == const0_rtx
16476 && (code == LT || code == GE))
16477 compare_code = code;
16478 else if (ix86_compare_op1 == constm1_rtx)
16482 else if (code == GT)
16487 /* Optimize dest = (op0 < 0) ? -1 : cf. */
16488 if (compare_code != UNKNOWN
16489 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
16490 && (cf == -1 || ct == -1))
16492 /* If lea code below could be used, only optimize
16493 if it results in a 2 insn sequence. */
16495 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
16496 || diff == 3 || diff == 5 || diff == 9)
16497 || (compare_code == LT && ct == -1)
16498 || (compare_code == GE && cf == -1))
16501 * notl op1 (if necessary)
16509 code = reverse_condition (code);
16512 out = emit_store_flag (out, code, ix86_compare_op0,
16513 ix86_compare_op1, VOIDmode, 0, -1);
16515 out = expand_simple_binop (mode, IOR,
16517 out, 1, OPTAB_DIRECT);
16518 if (out != operands[0])
16519 emit_move_insn (operands[0], out);
16521 return 1; /* DONE */
16526 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
16527 || diff == 3 || diff == 5 || diff == 9)
16528 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
16530 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
16536 * lea cf(dest*(ct-cf)),dest
16540 * This also catches the degenerate setcc-only case.
16546 out = emit_store_flag (out, code, ix86_compare_op0,
16547 ix86_compare_op1, VOIDmode, 0, 1);
16550 /* On x86_64 the lea instruction operates on Pmode, so we need
16551 to get arithmetics done in proper mode to match. */
16553 tmp = copy_rtx (out);
16557 out1 = copy_rtx (out);
16558 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
16562 tmp = gen_rtx_PLUS (mode, tmp, out1);
16568 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
16571 if (!rtx_equal_p (tmp, out))
16574 out = force_operand (tmp, copy_rtx (out));
16576 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
16578 if (!rtx_equal_p (out, operands[0]))
16579 emit_move_insn (operands[0], copy_rtx (out));
16581 return 1; /* DONE */
16585 * General case: Jumpful:
16586 * xorl dest,dest cmpl op1, op2
16587 * cmpl op1, op2 movl ct, dest
16588 * setcc dest jcc 1f
16589 * decl dest movl cf, dest
16590 * andl (cf-ct),dest 1:
16593 * Size 20. Size 14.
16595 * This is reasonably steep, but branch mispredict costs are
16596 * high on modern cpus, so consider failing only if optimizing
16600 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
16601 && BRANCH_COST (optimize_insn_for_speed_p (),
16606 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
16611 if (SCALAR_FLOAT_MODE_P (cmp_mode))
16613 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
16615 /* We may be reversing unordered compare to normal compare,
16616 that is not valid in general (we may convert non-trapping
16617 condition to trapping one), however on i386 we currently
16618 emit all comparisons unordered. */
16619 code = reverse_condition_maybe_unordered (code);
16623 code = reverse_condition (code);
16624 if (compare_code != UNKNOWN)
16625 compare_code = reverse_condition (compare_code);
16629 if (compare_code != UNKNOWN)
16631 /* notl op1 (if needed)
16636 For x < 0 (resp. x <= -1) there will be no notl,
16637 so if possible swap the constants to get rid of the
16639 True/false will be -1/0 while code below (store flag
16640 followed by decrement) is 0/-1, so the constants need
16641 to be exchanged once more. */
16643 if (compare_code == GE || !cf)
16645 code = reverse_condition (code);
16650 HOST_WIDE_INT tmp = cf;
16655 out = emit_store_flag (out, code, ix86_compare_op0,
16656 ix86_compare_op1, VOIDmode, 0, -1);
16660 out = emit_store_flag (out, code, ix86_compare_op0,
16661 ix86_compare_op1, VOIDmode, 0, 1);
16663 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
16664 copy_rtx (out), 1, OPTAB_DIRECT);
16667 out = expand_simple_binop (mode, AND, copy_rtx (out),
16668 gen_int_mode (cf - ct, mode),
16669 copy_rtx (out), 1, OPTAB_DIRECT);
16671 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
16672 copy_rtx (out), 1, OPTAB_DIRECT);
16673 if (!rtx_equal_p (out, operands[0]))
16674 emit_move_insn (operands[0], copy_rtx (out));
16676 return 1; /* DONE */
16680 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
16682 /* Try a few things more with specific constants and a variable. */
16685 rtx var, orig_out, out, tmp;
16687 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
16688 return 0; /* FAIL */
16690 /* If one of the two operands is an interesting constant, load a
16691 constant with the above and mask it in with a logical operation. */
16693 if (CONST_INT_P (operands[2]))
16696 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
16697 operands[3] = constm1_rtx, op = and_optab;
16698 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
16699 operands[3] = const0_rtx, op = ior_optab;
16701 return 0; /* FAIL */
16703 else if (CONST_INT_P (operands[3]))
16706 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
16707 operands[2] = constm1_rtx, op = and_optab;
16708 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
16709 operands[2] = const0_rtx, op = ior_optab;
16711 return 0; /* FAIL */
16714 return 0; /* FAIL */
16716 orig_out = operands[0];
16717 tmp = gen_reg_rtx (mode);
16720 /* Recurse to get the constant loaded. */
16721 if (ix86_expand_int_movcc (operands) == 0)
16722 return 0; /* FAIL */
16724 /* Mask in the interesting variable. */
16725 out = expand_binop (mode, op, var, tmp, orig_out, 0,
16727 if (!rtx_equal_p (out, orig_out))
16728 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
16730 return 1; /* DONE */
16734 * For comparison with above,
16744 if (! nonimmediate_operand (operands[2], mode))
16745 operands[2] = force_reg (mode, operands[2]);
16746 if (! nonimmediate_operand (operands[3], mode))
16747 operands[3] = force_reg (mode, operands[3]);
16749 if (! register_operand (operands[2], VOIDmode)
16751 || ! register_operand (operands[3], VOIDmode)))
16752 operands[2] = force_reg (mode, operands[2]);
16755 && ! register_operand (operands[3], VOIDmode))
16756 operands[3] = force_reg (mode, operands[3]);
16758 emit_insn (compare_seq);
16759 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
16760 gen_rtx_IF_THEN_ELSE (mode,
16761 compare_op, operands[2],
16764 return 1; /* DONE */
16767 /* Swap, force into registers, or otherwise massage the two operands
16768 to an sse comparison with a mask result. Thus we differ a bit from
16769 ix86_prepare_fp_compare_args which expects to produce a flags result.
16771 The DEST operand exists to help determine whether to commute commutative
16772 operators. The POP0/POP1 operands are updated in place. The new
16773 comparison code is returned, or UNKNOWN if not implementable. */
16775 static enum rtx_code
16776 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
16777 rtx *pop0, rtx *pop1)
16785 /* We have no LTGT as an operator. We could implement it with
16786 NE & ORDERED, but this requires an extra temporary. It's
16787 not clear that it's worth it. */
16794 /* These are supported directly. */
16801 /* For commutative operators, try to canonicalize the destination
16802 operand to be first in the comparison - this helps reload to
16803 avoid extra moves. */
16804 if (!dest || !rtx_equal_p (dest, *pop1))
16812 /* These are not supported directly. Swap the comparison operands
16813 to transform into something that is supported. */
16817 code = swap_condition (code);
16821 gcc_unreachable ();
16827 /* Detect conditional moves that exactly match min/max operational
16828 semantics. Note that this is IEEE safe, as long as we don't
16829 interchange the operands.
16831 Returns FALSE if this conditional move doesn't match a MIN/MAX,
16832 and TRUE if the operation is successful and instructions are emitted. */
16835 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
16836 rtx cmp_op1, rtx if_true, rtx if_false)
16838 enum machine_mode mode;
16844 else if (code == UNGE)
16847 if_true = if_false;
16853 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
16855 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
16860 mode = GET_MODE (dest);
16862 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
16863 but MODE may be a vector mode and thus not appropriate. */
16864 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
16866 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
16869 if_true = force_reg (mode, if_true);
16870 v = gen_rtvec (2, if_true, if_false);
16871 tmp = gen_rtx_UNSPEC (mode, v, u);
16875 code = is_min ? SMIN : SMAX;
16876 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
16879 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
16883 /* Expand an sse vector comparison. Return the register with the result. */
16886 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
16887 rtx op_true, rtx op_false)
16889 enum machine_mode mode = GET_MODE (dest);
16892 cmp_op0 = force_reg (mode, cmp_op0);
16893 if (!nonimmediate_operand (cmp_op1, mode))
16894 cmp_op1 = force_reg (mode, cmp_op1);
16897 || reg_overlap_mentioned_p (dest, op_true)
16898 || reg_overlap_mentioned_p (dest, op_false))
16899 dest = gen_reg_rtx (mode);
16901 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
16902 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16907 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
16908 operations. This is used for both scalar and vector conditional moves. */
16911 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
16913 enum machine_mode mode = GET_MODE (dest);
16916 if (op_false == CONST0_RTX (mode))
16918 op_true = force_reg (mode, op_true);
16919 x = gen_rtx_AND (mode, cmp, op_true);
16920 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16922 else if (op_true == CONST0_RTX (mode))
16924 op_false = force_reg (mode, op_false);
16925 x = gen_rtx_NOT (mode, cmp);
16926 x = gen_rtx_AND (mode, x, op_false);
16927 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16929 else if (TARGET_XOP)
16931 rtx pcmov = gen_rtx_SET (mode, dest,
16932 gen_rtx_IF_THEN_ELSE (mode, cmp,
16939 op_true = force_reg (mode, op_true);
16940 op_false = force_reg (mode, op_false);
16942 t2 = gen_reg_rtx (mode);
16944 t3 = gen_reg_rtx (mode);
16948 x = gen_rtx_AND (mode, op_true, cmp);
16949 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
16951 x = gen_rtx_NOT (mode, cmp);
16952 x = gen_rtx_AND (mode, x, op_false);
16953 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
16955 x = gen_rtx_IOR (mode, t3, t2);
16956 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16960 /* Expand a floating-point conditional move. Return true if successful. */
16963 ix86_expand_fp_movcc (rtx operands[])
16965 enum machine_mode mode = GET_MODE (operands[0]);
16966 enum rtx_code code = GET_CODE (operands[1]);
16967 rtx tmp, compare_op;
16969 ix86_compare_op0 = XEXP (operands[1], 0);
16970 ix86_compare_op1 = XEXP (operands[1], 1);
16971 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
16973 enum machine_mode cmode;
16975 /* Since we've no cmove for sse registers, don't force bad register
16976 allocation just to gain access to it. Deny movcc when the
16977 comparison mode doesn't match the move mode. */
16978 cmode = GET_MODE (ix86_compare_op0);
16979 if (cmode == VOIDmode)
16980 cmode = GET_MODE (ix86_compare_op1);
16984 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
16986 &ix86_compare_op1);
16987 if (code == UNKNOWN)
16990 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
16991 ix86_compare_op1, operands[2],
16995 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
16996 ix86_compare_op1, operands[2], operands[3]);
16997 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
17001 /* The floating point conditional move instructions don't directly
17002 support conditions resulting from a signed integer comparison. */
17004 compare_op = ix86_expand_compare (code);
17005 if (!fcmov_comparison_operator (compare_op, VOIDmode))
17007 tmp = gen_reg_rtx (QImode);
17008 ix86_expand_setcc (code, tmp);
17010 ix86_compare_op0 = tmp;
17011 ix86_compare_op1 = const0_rtx;
17012 compare_op = ix86_expand_compare (code);
17015 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17016 gen_rtx_IF_THEN_ELSE (mode, compare_op,
17017 operands[2], operands[3])));
17022 /* Expand a floating-point vector conditional move; a vcond operation
17023 rather than a movcc operation. */
17026 ix86_expand_fp_vcond (rtx operands[])
17028 enum rtx_code code = GET_CODE (operands[3]);
17031 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
17032 &operands[4], &operands[5]);
17033 if (code == UNKNOWN)
17036 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
17037 operands[5], operands[1], operands[2]))
17040 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
17041 operands[1], operands[2]);
17042 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
17046 /* Expand a signed/unsigned integral vector conditional move. */
17049 ix86_expand_int_vcond (rtx operands[])
17051 enum machine_mode mode = GET_MODE (operands[0]);
17052 enum rtx_code code = GET_CODE (operands[3]);
17053 bool negate = false;
17056 cop0 = operands[4];
17057 cop1 = operands[5];
17059 /* XOP supports all of the comparisons on all vector int types. */
17062 /* Canonicalize the comparison to EQ, GT, GTU. */
17073 code = reverse_condition (code);
17079 code = reverse_condition (code);
17085 code = swap_condition (code);
17086 x = cop0, cop0 = cop1, cop1 = x;
17090 gcc_unreachable ();
17093 /* Only SSE4.1/SSE4.2 supports V2DImode. */
17094 if (mode == V2DImode)
17099 /* SSE4.1 supports EQ. */
17100 if (!TARGET_SSE4_1)
17106 /* SSE4.2 supports GT/GTU. */
17107 if (!TARGET_SSE4_2)
17112 gcc_unreachable ();
17116 /* Unsigned parallel compare is not supported by the hardware.
17117 Play some tricks to turn this into a signed comparison
17121 cop0 = force_reg (mode, cop0);
17129 rtx (*gen_sub3) (rtx, rtx, rtx);
17131 /* Subtract (-(INT MAX) - 1) from both operands to make
17133 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
17135 gen_sub3 = (mode == V4SImode
17136 ? gen_subv4si3 : gen_subv2di3);
17137 t1 = gen_reg_rtx (mode);
17138 emit_insn (gen_sub3 (t1, cop0, mask));
17140 t2 = gen_reg_rtx (mode);
17141 emit_insn (gen_sub3 (t2, cop1, mask));
17151 /* Perform a parallel unsigned saturating subtraction. */
17152 x = gen_reg_rtx (mode);
17153 emit_insn (gen_rtx_SET (VOIDmode, x,
17154 gen_rtx_US_MINUS (mode, cop0, cop1)));
17157 cop1 = CONST0_RTX (mode);
17163 gcc_unreachable ();
17168 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
17169 operands[1+negate], operands[2-negate]);
17171 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
17172 operands[2-negate]);
17176 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
17177 true if we should do zero extension, else sign extension. HIGH_P is
17178 true if we want the N/2 high elements, else the low elements. */
17181 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
17183 enum machine_mode imode = GET_MODE (operands[1]);
17184 rtx (*unpack)(rtx, rtx, rtx);
17191 unpack = gen_vec_interleave_highv16qi;
17193 unpack = gen_vec_interleave_lowv16qi;
17197 unpack = gen_vec_interleave_highv8hi;
17199 unpack = gen_vec_interleave_lowv8hi;
17203 unpack = gen_vec_interleave_highv4si;
17205 unpack = gen_vec_interleave_lowv4si;
17208 gcc_unreachable ();
17211 dest = gen_lowpart (imode, operands[0]);
17214 se = force_reg (imode, CONST0_RTX (imode));
17216 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
17217 operands[1], pc_rtx, pc_rtx);
17219 emit_insn (unpack (dest, operands[1], se));
17222 /* This function performs the same task as ix86_expand_sse_unpack,
17223 but with SSE4.1 instructions. */
17226 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
17228 enum machine_mode imode = GET_MODE (operands[1]);
17229 rtx (*unpack)(rtx, rtx);
17236 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
17238 unpack = gen_sse4_1_extendv8qiv8hi2;
17242 unpack = gen_sse4_1_zero_extendv4hiv4si2;
17244 unpack = gen_sse4_1_extendv4hiv4si2;
17248 unpack = gen_sse4_1_zero_extendv2siv2di2;
17250 unpack = gen_sse4_1_extendv2siv2di2;
17253 gcc_unreachable ();
17256 dest = operands[0];
17259 /* Shift higher 8 bytes to lower 8 bytes. */
17260 src = gen_reg_rtx (imode);
17261 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, src),
17262 gen_lowpart (V1TImode, operands[1]),
17268 emit_insn (unpack (dest, src));
17271 /* Expand conditional increment or decrement using adb/sbb instructions.
17272 The default case using setcc followed by the conditional move can be
17273 done by generic code. */
17275 ix86_expand_int_addcc (rtx operands[])
17277 enum rtx_code code = GET_CODE (operands[1]);
17279 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
17281 rtx val = const0_rtx;
17282 bool fpcmp = false;
17283 enum machine_mode mode;
17285 ix86_compare_op0 = XEXP (operands[1], 0);
17286 ix86_compare_op1 = XEXP (operands[1], 1);
17287 if (operands[3] != const1_rtx
17288 && operands[3] != constm1_rtx)
17290 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
17291 ix86_compare_op1, &compare_op))
17293 code = GET_CODE (compare_op);
17295 flags = XEXP (compare_op, 0);
17297 if (GET_MODE (flags) == CCFPmode
17298 || GET_MODE (flags) == CCFPUmode)
17301 code = ix86_fp_compare_code_to_integer (code);
17308 PUT_CODE (compare_op,
17309 reverse_condition_maybe_unordered
17310 (GET_CODE (compare_op)));
17312 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
17315 mode = GET_MODE (operands[0]);
17317 /* Construct either adc or sbb insn. */
17318 if ((code == LTU) == (operands[3] == constm1_rtx))
17323 insn = gen_subqi3_carry;
17326 insn = gen_subhi3_carry;
17329 insn = gen_subsi3_carry;
17332 insn = gen_subdi3_carry;
17335 gcc_unreachable ();
17343 insn = gen_addqi3_carry;
17346 insn = gen_addhi3_carry;
17349 insn = gen_addsi3_carry;
17352 insn = gen_adddi3_carry;
17355 gcc_unreachable ();
17358 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
17360 return 1; /* DONE */
17364 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
17365 works for floating pointer parameters and nonoffsetable memories.
17366 For pushes, it returns just stack offsets; the values will be saved
17367 in the right order. Maximally three parts are generated. */
17370 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
17375 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
17377 size = (GET_MODE_SIZE (mode) + 4) / 8;
17379 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
17380 gcc_assert (size >= 2 && size <= 4);
17382 /* Optimize constant pool reference to immediates. This is used by fp
17383 moves, that force all constants to memory to allow combining. */
17384 if (MEM_P (operand) && MEM_READONLY_P (operand))
17386 rtx tmp = maybe_get_pool_constant (operand);
17391 if (MEM_P (operand) && !offsettable_memref_p (operand))
17393 /* The only non-offsetable memories we handle are pushes. */
17394 int ok = push_operand (operand, VOIDmode);
17398 operand = copy_rtx (operand);
17399 PUT_MODE (operand, Pmode);
17400 parts[0] = parts[1] = parts[2] = parts[3] = operand;
17404 if (GET_CODE (operand) == CONST_VECTOR)
17406 enum machine_mode imode = int_mode_for_mode (mode);
17407 /* Caution: if we looked through a constant pool memory above,
17408 the operand may actually have a different mode now. That's
17409 ok, since we want to pun this all the way back to an integer. */
17410 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
17411 gcc_assert (operand != NULL);
17417 if (mode == DImode)
17418 split_di (&operand, 1, &parts[0], &parts[1]);
17423 if (REG_P (operand))
17425 gcc_assert (reload_completed);
17426 for (i = 0; i < size; i++)
17427 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
17429 else if (offsettable_memref_p (operand))
17431 operand = adjust_address (operand, SImode, 0);
17432 parts[0] = operand;
17433 for (i = 1; i < size; i++)
17434 parts[i] = adjust_address (operand, SImode, 4 * i);
17436 else if (GET_CODE (operand) == CONST_DOUBLE)
17441 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
17445 real_to_target (l, &r, mode);
17446 parts[3] = gen_int_mode (l[3], SImode);
17447 parts[2] = gen_int_mode (l[2], SImode);
17450 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
17451 parts[2] = gen_int_mode (l[2], SImode);
17454 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
17457 gcc_unreachable ();
17459 parts[1] = gen_int_mode (l[1], SImode);
17460 parts[0] = gen_int_mode (l[0], SImode);
17463 gcc_unreachable ();
17468 if (mode == TImode)
17469 split_ti (&operand, 1, &parts[0], &parts[1]);
17470 if (mode == XFmode || mode == TFmode)
17472 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
17473 if (REG_P (operand))
17475 gcc_assert (reload_completed);
17476 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
17477 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
17479 else if (offsettable_memref_p (operand))
17481 operand = adjust_address (operand, DImode, 0);
17482 parts[0] = operand;
17483 parts[1] = adjust_address (operand, upper_mode, 8);
17485 else if (GET_CODE (operand) == CONST_DOUBLE)
17490 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
17491 real_to_target (l, &r, mode);
17493 /* Do not use shift by 32 to avoid warning on 32bit systems. */
17494 if (HOST_BITS_PER_WIDE_INT >= 64)
17497 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
17498 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
17501 parts[0] = immed_double_const (l[0], l[1], DImode);
17503 if (upper_mode == SImode)
17504 parts[1] = gen_int_mode (l[2], SImode);
17505 else if (HOST_BITS_PER_WIDE_INT >= 64)
17508 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
17509 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
17512 parts[1] = immed_double_const (l[2], l[3], DImode);
17515 gcc_unreachable ();
17522 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
17523 Return false when normal moves are needed; true when all required
17524 insns have been emitted. Operands 2-4 contain the input values
17525 int the correct order; operands 5-7 contain the output values. */
17528 ix86_split_long_move (rtx operands[])
17533 int collisions = 0;
17534 enum machine_mode mode = GET_MODE (operands[0]);
17535 bool collisionparts[4];
17537 /* The DFmode expanders may ask us to move double.
17538 For 64bit target this is single move. By hiding the fact
17539 here we simplify i386.md splitters. */
17540 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
17542 /* Optimize constant pool reference to immediates. This is used by
17543 fp moves, that force all constants to memory to allow combining. */
17545 if (MEM_P (operands[1])
17546 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
17547 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
17548 operands[1] = get_pool_constant (XEXP (operands[1], 0));
17549 if (push_operand (operands[0], VOIDmode))
17551 operands[0] = copy_rtx (operands[0]);
17552 PUT_MODE (operands[0], Pmode);
17555 operands[0] = gen_lowpart (DImode, operands[0]);
17556 operands[1] = gen_lowpart (DImode, operands[1]);
17557 emit_move_insn (operands[0], operands[1]);
17561 /* The only non-offsettable memory we handle is push. */
17562 if (push_operand (operands[0], VOIDmode))
17565 gcc_assert (!MEM_P (operands[0])
17566 || offsettable_memref_p (operands[0]));
17568 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
17569 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
17571 /* When emitting push, take care for source operands on the stack. */
17572 if (push && MEM_P (operands[1])
17573 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
17575 rtx src_base = XEXP (part[1][nparts - 1], 0);
17577 /* Compensate for the stack decrement by 4. */
17578 if (!TARGET_64BIT && nparts == 3
17579 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
17580 src_base = plus_constant (src_base, 4);
17582 /* src_base refers to the stack pointer and is
17583 automatically decreased by emitted push. */
17584 for (i = 0; i < nparts; i++)
17585 part[1][i] = change_address (part[1][i],
17586 GET_MODE (part[1][i]), src_base);
17589 /* We need to do copy in the right order in case an address register
17590 of the source overlaps the destination. */
17591 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
17595 for (i = 0; i < nparts; i++)
17598 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
17599 if (collisionparts[i])
17603 /* Collision in the middle part can be handled by reordering. */
17604 if (collisions == 1 && nparts == 3 && collisionparts [1])
17606 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
17607 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
17609 else if (collisions == 1
17611 && (collisionparts [1] || collisionparts [2]))
17613 if (collisionparts [1])
17615 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
17616 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
17620 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
17621 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
17625 /* If there are more collisions, we can't handle it by reordering.
17626 Do an lea to the last part and use only one colliding move. */
17627 else if (collisions > 1)
17633 base = part[0][nparts - 1];
17635 /* Handle the case when the last part isn't valid for lea.
17636 Happens in 64-bit mode storing the 12-byte XFmode. */
17637 if (GET_MODE (base) != Pmode)
17638 base = gen_rtx_REG (Pmode, REGNO (base));
17640 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
17641 part[1][0] = replace_equiv_address (part[1][0], base);
17642 for (i = 1; i < nparts; i++)
17644 tmp = plus_constant (base, UNITS_PER_WORD * i);
17645 part[1][i] = replace_equiv_address (part[1][i], tmp);
17656 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
17657 emit_insn (gen_addsi3 (stack_pointer_rtx,
17658 stack_pointer_rtx, GEN_INT (-4)));
17659 emit_move_insn (part[0][2], part[1][2]);
17661 else if (nparts == 4)
17663 emit_move_insn (part[0][3], part[1][3]);
17664 emit_move_insn (part[0][2], part[1][2]);
17669 /* In 64bit mode we don't have 32bit push available. In case this is
17670 register, it is OK - we will just use larger counterpart. We also
17671 retype memory - these comes from attempt to avoid REX prefix on
17672 moving of second half of TFmode value. */
17673 if (GET_MODE (part[1][1]) == SImode)
17675 switch (GET_CODE (part[1][1]))
17678 part[1][1] = adjust_address (part[1][1], DImode, 0);
17682 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
17686 gcc_unreachable ();
17689 if (GET_MODE (part[1][0]) == SImode)
17690 part[1][0] = part[1][1];
17693 emit_move_insn (part[0][1], part[1][1]);
17694 emit_move_insn (part[0][0], part[1][0]);
17698 /* Choose correct order to not overwrite the source before it is copied. */
17699 if ((REG_P (part[0][0])
17700 && REG_P (part[1][1])
17701 && (REGNO (part[0][0]) == REGNO (part[1][1])
17703 && REGNO (part[0][0]) == REGNO (part[1][2]))
17705 && REGNO (part[0][0]) == REGNO (part[1][3]))))
17707 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
17709 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
17711 operands[2 + i] = part[0][j];
17712 operands[6 + i] = part[1][j];
17717 for (i = 0; i < nparts; i++)
17719 operands[2 + i] = part[0][i];
17720 operands[6 + i] = part[1][i];
17724 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
17725 if (optimize_insn_for_size_p ())
17727 for (j = 0; j < nparts - 1; j++)
17728 if (CONST_INT_P (operands[6 + j])
17729 && operands[6 + j] != const0_rtx
17730 && REG_P (operands[2 + j]))
17731 for (i = j; i < nparts - 1; i++)
17732 if (CONST_INT_P (operands[7 + i])
17733 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
17734 operands[7 + i] = operands[2 + j];
17737 for (i = 0; i < nparts; i++)
17738 emit_move_insn (operands[2 + i], operands[6 + i]);
17743 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
17744 left shift by a constant, either using a single shift or
17745 a sequence of add instructions. */
17748 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
17752 emit_insn ((mode == DImode
17754 : gen_adddi3) (operand, operand, operand));
17756 else if (!optimize_insn_for_size_p ()
17757 && count * ix86_cost->add <= ix86_cost->shift_const)
17760 for (i=0; i<count; i++)
17762 emit_insn ((mode == DImode
17764 : gen_adddi3) (operand, operand, operand));
17768 emit_insn ((mode == DImode
17770 : gen_ashldi3) (operand, operand, GEN_INT (count)));
17774 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
17776 rtx low[2], high[2];
17778 const int single_width = mode == DImode ? 32 : 64;
17780 if (CONST_INT_P (operands[2]))
17782 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17783 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17785 if (count >= single_width)
17787 emit_move_insn (high[0], low[1]);
17788 emit_move_insn (low[0], const0_rtx);
17790 if (count > single_width)
17791 ix86_expand_ashl_const (high[0], count - single_width, mode);
17795 if (!rtx_equal_p (operands[0], operands[1]))
17796 emit_move_insn (operands[0], operands[1]);
17797 emit_insn ((mode == DImode
17799 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
17800 ix86_expand_ashl_const (low[0], count, mode);
17805 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17807 if (operands[1] == const1_rtx)
17809 /* Assuming we've chosen a QImode capable registers, then 1 << N
17810 can be done with two 32/64-bit shifts, no branches, no cmoves. */
17811 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
17813 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
17815 ix86_expand_clear (low[0]);
17816 ix86_expand_clear (high[0]);
17817 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
17819 d = gen_lowpart (QImode, low[0]);
17820 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
17821 s = gen_rtx_EQ (QImode, flags, const0_rtx);
17822 emit_insn (gen_rtx_SET (VOIDmode, d, s));
17824 d = gen_lowpart (QImode, high[0]);
17825 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
17826 s = gen_rtx_NE (QImode, flags, const0_rtx);
17827 emit_insn (gen_rtx_SET (VOIDmode, d, s));
17830 /* Otherwise, we can get the same results by manually performing
17831 a bit extract operation on bit 5/6, and then performing the two
17832 shifts. The two methods of getting 0/1 into low/high are exactly
17833 the same size. Avoiding the shift in the bit extract case helps
17834 pentium4 a bit; no one else seems to care much either way. */
17839 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
17840 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
17842 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
17843 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
17845 emit_insn ((mode == DImode
17847 : gen_lshrdi3) (high[0], high[0],
17848 GEN_INT (mode == DImode ? 5 : 6)));
17849 emit_insn ((mode == DImode
17851 : gen_anddi3) (high[0], high[0], const1_rtx));
17852 emit_move_insn (low[0], high[0]);
17853 emit_insn ((mode == DImode
17855 : gen_xordi3) (low[0], low[0], const1_rtx));
17858 emit_insn ((mode == DImode
17860 : gen_ashldi3) (low[0], low[0], operands[2]));
17861 emit_insn ((mode == DImode
17863 : gen_ashldi3) (high[0], high[0], operands[2]));
17867 if (operands[1] == constm1_rtx)
17869 /* For -1 << N, we can avoid the shld instruction, because we
17870 know that we're shifting 0...31/63 ones into a -1. */
17871 emit_move_insn (low[0], constm1_rtx);
17872 if (optimize_insn_for_size_p ())
17873 emit_move_insn (high[0], low[0]);
17875 emit_move_insn (high[0], constm1_rtx);
17879 if (!rtx_equal_p (operands[0], operands[1]))
17880 emit_move_insn (operands[0], operands[1]);
17882 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17883 emit_insn ((mode == DImode
17885 : gen_x86_64_shld) (high[0], low[0], operands[2]));
17888 emit_insn ((mode == DImode
17890 : gen_ashldi3) (low[0], low[0], operands[2]));
17892 if (TARGET_CMOVE && scratch)
17894 ix86_expand_clear (scratch);
17895 emit_insn ((mode == DImode
17896 ? gen_x86_shiftsi_adj_1
17897 : gen_x86_shiftdi_adj_1) (high[0], low[0], operands[2],
17901 emit_insn ((mode == DImode
17902 ? gen_x86_shiftsi_adj_2
17903 : gen_x86_shiftdi_adj_2) (high[0], low[0], operands[2]));
17907 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
17909 rtx low[2], high[2];
17911 const int single_width = mode == DImode ? 32 : 64;
17913 if (CONST_INT_P (operands[2]))
17915 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17916 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17918 if (count == single_width * 2 - 1)
17920 emit_move_insn (high[0], high[1]);
17921 emit_insn ((mode == DImode
17923 : gen_ashrdi3) (high[0], high[0],
17924 GEN_INT (single_width - 1)));
17925 emit_move_insn (low[0], high[0]);
17928 else if (count >= single_width)
17930 emit_move_insn (low[0], high[1]);
17931 emit_move_insn (high[0], low[0]);
17932 emit_insn ((mode == DImode
17934 : gen_ashrdi3) (high[0], high[0],
17935 GEN_INT (single_width - 1)));
17936 if (count > single_width)
17937 emit_insn ((mode == DImode
17939 : gen_ashrdi3) (low[0], low[0],
17940 GEN_INT (count - single_width)));
17944 if (!rtx_equal_p (operands[0], operands[1]))
17945 emit_move_insn (operands[0], operands[1]);
17946 emit_insn ((mode == DImode
17948 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
17949 emit_insn ((mode == DImode
17951 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
17956 if (!rtx_equal_p (operands[0], operands[1]))
17957 emit_move_insn (operands[0], operands[1]);
17959 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17961 emit_insn ((mode == DImode
17963 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
17964 emit_insn ((mode == DImode
17966 : gen_ashrdi3) (high[0], high[0], operands[2]));
17968 if (TARGET_CMOVE && scratch)
17970 emit_move_insn (scratch, high[0]);
17971 emit_insn ((mode == DImode
17973 : gen_ashrdi3) (scratch, scratch,
17974 GEN_INT (single_width - 1)));
17975 emit_insn ((mode == DImode
17976 ? gen_x86_shiftsi_adj_1
17977 : gen_x86_shiftdi_adj_1) (low[0], high[0], operands[2],
17981 emit_insn ((mode == DImode
17982 ? gen_x86_shiftsi_adj_3
17983 : gen_x86_shiftdi_adj_3) (low[0], high[0], operands[2]));
17988 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
17990 rtx low[2], high[2];
17992 const int single_width = mode == DImode ? 32 : 64;
17994 if (CONST_INT_P (operands[2]))
17996 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17997 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17999 if (count >= single_width)
18001 emit_move_insn (low[0], high[1]);
18002 ix86_expand_clear (high[0]);
18004 if (count > single_width)
18005 emit_insn ((mode == DImode
18007 : gen_lshrdi3) (low[0], low[0],
18008 GEN_INT (count - single_width)));
18012 if (!rtx_equal_p (operands[0], operands[1]))
18013 emit_move_insn (operands[0], operands[1]);
18014 emit_insn ((mode == DImode
18016 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
18017 emit_insn ((mode == DImode
18019 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
18024 if (!rtx_equal_p (operands[0], operands[1]))
18025 emit_move_insn (operands[0], operands[1]);
18027 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
18029 emit_insn ((mode == DImode
18031 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
18032 emit_insn ((mode == DImode
18034 : gen_lshrdi3) (high[0], high[0], operands[2]));
18036 /* Heh. By reversing the arguments, we can reuse this pattern. */
18037 if (TARGET_CMOVE && scratch)
18039 ix86_expand_clear (scratch);
18040 emit_insn ((mode == DImode
18041 ? gen_x86_shiftsi_adj_1
18042 : gen_x86_shiftdi_adj_1) (low[0], high[0], operands[2],
18046 emit_insn ((mode == DImode
18047 ? gen_x86_shiftsi_adj_2
18048 : gen_x86_shiftdi_adj_2) (low[0], high[0], operands[2]));
18052 /* Predict just emitted jump instruction to be taken with probability PROB. */
18054 predict_jump (int prob)
18056 rtx insn = get_last_insn ();
18057 gcc_assert (JUMP_P (insn));
18058 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
18061 /* Helper function for the string operations below. Dest VARIABLE whether
18062 it is aligned to VALUE bytes. If true, jump to the label. */
18064 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
18066 rtx label = gen_label_rtx ();
18067 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
18068 if (GET_MODE (variable) == DImode)
18069 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
18071 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
18072 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
18075 predict_jump (REG_BR_PROB_BASE * 50 / 100);
18077 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18081 /* Adjust COUNTER by the VALUE. */
18083 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
18085 if (GET_MODE (countreg) == DImode)
18086 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
18088 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
18091 /* Zero extend possibly SImode EXP to Pmode register. */
18093 ix86_zero_extend_to_Pmode (rtx exp)
18096 if (GET_MODE (exp) == VOIDmode)
18097 return force_reg (Pmode, exp);
18098 if (GET_MODE (exp) == Pmode)
18099 return copy_to_mode_reg (Pmode, exp);
18100 r = gen_reg_rtx (Pmode);
18101 emit_insn (gen_zero_extendsidi2 (r, exp));
18105 /* Divide COUNTREG by SCALE. */
18107 scale_counter (rtx countreg, int scale)
18113 if (CONST_INT_P (countreg))
18114 return GEN_INT (INTVAL (countreg) / scale);
18115 gcc_assert (REG_P (countreg));
18117 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
18118 GEN_INT (exact_log2 (scale)),
18119 NULL, 1, OPTAB_DIRECT);
18123 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
18124 DImode for constant loop counts. */
18126 static enum machine_mode
18127 counter_mode (rtx count_exp)
18129 if (GET_MODE (count_exp) != VOIDmode)
18130 return GET_MODE (count_exp);
18131 if (!CONST_INT_P (count_exp))
18133 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
18138 /* When SRCPTR is non-NULL, output simple loop to move memory
18139 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
18140 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
18141 equivalent loop to set memory by VALUE (supposed to be in MODE).
18143 The size is rounded down to whole number of chunk size moved at once.
18144 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
18148 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
18149 rtx destptr, rtx srcptr, rtx value,
18150 rtx count, enum machine_mode mode, int unroll,
18153 rtx out_label, top_label, iter, tmp;
18154 enum machine_mode iter_mode = counter_mode (count);
18155 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
18156 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
18162 top_label = gen_label_rtx ();
18163 out_label = gen_label_rtx ();
18164 iter = gen_reg_rtx (iter_mode);
18166 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
18167 NULL, 1, OPTAB_DIRECT);
18168 /* Those two should combine. */
18169 if (piece_size == const1_rtx)
18171 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
18173 predict_jump (REG_BR_PROB_BASE * 10 / 100);
18175 emit_move_insn (iter, const0_rtx);
18177 emit_label (top_label);
18179 tmp = convert_modes (Pmode, iter_mode, iter, true);
18180 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
18181 destmem = change_address (destmem, mode, x_addr);
18185 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
18186 srcmem = change_address (srcmem, mode, y_addr);
18188 /* When unrolling for chips that reorder memory reads and writes,
18189 we can save registers by using single temporary.
18190 Also using 4 temporaries is overkill in 32bit mode. */
18191 if (!TARGET_64BIT && 0)
18193 for (i = 0; i < unroll; i++)
18198 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
18200 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
18202 emit_move_insn (destmem, srcmem);
18208 gcc_assert (unroll <= 4);
18209 for (i = 0; i < unroll; i++)
18211 tmpreg[i] = gen_reg_rtx (mode);
18215 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
18217 emit_move_insn (tmpreg[i], srcmem);
18219 for (i = 0; i < unroll; i++)
18224 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
18226 emit_move_insn (destmem, tmpreg[i]);
18231 for (i = 0; i < unroll; i++)
18235 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
18236 emit_move_insn (destmem, value);
18239 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
18240 true, OPTAB_LIB_WIDEN);
18242 emit_move_insn (iter, tmp);
18244 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
18246 if (expected_size != -1)
18248 expected_size /= GET_MODE_SIZE (mode) * unroll;
18249 if (expected_size == 0)
18251 else if (expected_size > REG_BR_PROB_BASE)
18252 predict_jump (REG_BR_PROB_BASE - 1);
18254 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
18257 predict_jump (REG_BR_PROB_BASE * 80 / 100);
18258 iter = ix86_zero_extend_to_Pmode (iter);
18259 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
18260 true, OPTAB_LIB_WIDEN);
18261 if (tmp != destptr)
18262 emit_move_insn (destptr, tmp);
18265 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
18266 true, OPTAB_LIB_WIDEN);
18268 emit_move_insn (srcptr, tmp);
18270 emit_label (out_label);
18273 /* Output "rep; mov" instruction.
18274 Arguments have same meaning as for previous function */
18276 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
18277 rtx destptr, rtx srcptr,
18279 enum machine_mode mode)
18285 /* If the size is known, it is shorter to use rep movs. */
18286 if (mode == QImode && CONST_INT_P (count)
18287 && !(INTVAL (count) & 3))
18290 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
18291 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
18292 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
18293 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
18294 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
18295 if (mode != QImode)
18297 destexp = gen_rtx_ASHIFT (Pmode, countreg,
18298 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
18299 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
18300 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
18301 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
18302 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
18306 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
18307 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
18309 if (CONST_INT_P (count))
18311 count = GEN_INT (INTVAL (count)
18312 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
18313 destmem = shallow_copy_rtx (destmem);
18314 srcmem = shallow_copy_rtx (srcmem);
18315 set_mem_size (destmem, count);
18316 set_mem_size (srcmem, count);
18320 if (MEM_SIZE (destmem))
18321 set_mem_size (destmem, NULL_RTX);
18322 if (MEM_SIZE (srcmem))
18323 set_mem_size (srcmem, NULL_RTX);
18325 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
18329 /* Output "rep; stos" instruction.
18330 Arguments have same meaning as for previous function */
18332 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
18333 rtx count, enum machine_mode mode,
18339 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
18340 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
18341 value = force_reg (mode, gen_lowpart (mode, value));
18342 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
18343 if (mode != QImode)
18345 destexp = gen_rtx_ASHIFT (Pmode, countreg,
18346 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
18347 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
18350 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
18351 if (orig_value == const0_rtx && CONST_INT_P (count))
18353 count = GEN_INT (INTVAL (count)
18354 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
18355 destmem = shallow_copy_rtx (destmem);
18356 set_mem_size (destmem, count);
18358 else if (MEM_SIZE (destmem))
18359 set_mem_size (destmem, NULL_RTX);
18360 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
18364 emit_strmov (rtx destmem, rtx srcmem,
18365 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
18367 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
18368 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
18369 emit_insn (gen_strmov (destptr, dest, srcptr, src));
18372 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
18374 expand_movmem_epilogue (rtx destmem, rtx srcmem,
18375 rtx destptr, rtx srcptr, rtx count, int max_size)
18378 if (CONST_INT_P (count))
18380 HOST_WIDE_INT countval = INTVAL (count);
18383 if ((countval & 0x10) && max_size > 16)
18387 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
18388 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
18391 gcc_unreachable ();
18394 if ((countval & 0x08) && max_size > 8)
18397 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
18400 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
18401 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
18405 if ((countval & 0x04) && max_size > 4)
18407 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
18410 if ((countval & 0x02) && max_size > 2)
18412 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
18415 if ((countval & 0x01) && max_size > 1)
18417 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
18424 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
18425 count, 1, OPTAB_DIRECT);
18426 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
18427 count, QImode, 1, 4);
18431 /* When there are stringops, we can cheaply increase dest and src pointers.
18432 Otherwise we save code size by maintaining offset (zero is readily
18433 available from preceding rep operation) and using x86 addressing modes.
18435 if (TARGET_SINGLE_STRINGOP)
18439 rtx label = ix86_expand_aligntest (count, 4, true);
18440 src = change_address (srcmem, SImode, srcptr);
18441 dest = change_address (destmem, SImode, destptr);
18442 emit_insn (gen_strmov (destptr, dest, srcptr, src));
18443 emit_label (label);
18444 LABEL_NUSES (label) = 1;
18448 rtx label = ix86_expand_aligntest (count, 2, true);
18449 src = change_address (srcmem, HImode, srcptr);
18450 dest = change_address (destmem, HImode, destptr);
18451 emit_insn (gen_strmov (destptr, dest, srcptr, src));
18452 emit_label (label);
18453 LABEL_NUSES (label) = 1;
18457 rtx label = ix86_expand_aligntest (count, 1, true);
18458 src = change_address (srcmem, QImode, srcptr);
18459 dest = change_address (destmem, QImode, destptr);
18460 emit_insn (gen_strmov (destptr, dest, srcptr, src));
18461 emit_label (label);
18462 LABEL_NUSES (label) = 1;
18467 rtx offset = force_reg (Pmode, const0_rtx);
18472 rtx label = ix86_expand_aligntest (count, 4, true);
18473 src = change_address (srcmem, SImode, srcptr);
18474 dest = change_address (destmem, SImode, destptr);
18475 emit_move_insn (dest, src);
18476 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
18477 true, OPTAB_LIB_WIDEN);
18479 emit_move_insn (offset, tmp);
18480 emit_label (label);
18481 LABEL_NUSES (label) = 1;
18485 rtx label = ix86_expand_aligntest (count, 2, true);
18486 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
18487 src = change_address (srcmem, HImode, tmp);
18488 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
18489 dest = change_address (destmem, HImode, tmp);
18490 emit_move_insn (dest, src);
18491 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
18492 true, OPTAB_LIB_WIDEN);
18494 emit_move_insn (offset, tmp);
18495 emit_label (label);
18496 LABEL_NUSES (label) = 1;
18500 rtx label = ix86_expand_aligntest (count, 1, true);
18501 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
18502 src = change_address (srcmem, QImode, tmp);
18503 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
18504 dest = change_address (destmem, QImode, tmp);
18505 emit_move_insn (dest, src);
18506 emit_label (label);
18507 LABEL_NUSES (label) = 1;
18512 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
18514 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
18515 rtx count, int max_size)
18518 expand_simple_binop (counter_mode (count), AND, count,
18519 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
18520 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
18521 gen_lowpart (QImode, value), count, QImode,
18525 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
18527 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
18531 if (CONST_INT_P (count))
18533 HOST_WIDE_INT countval = INTVAL (count);
18536 if ((countval & 0x10) && max_size > 16)
18540 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
18541 emit_insn (gen_strset (destptr, dest, value));
18542 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
18543 emit_insn (gen_strset (destptr, dest, value));
18546 gcc_unreachable ();
18549 if ((countval & 0x08) && max_size > 8)
18553 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
18554 emit_insn (gen_strset (destptr, dest, value));
18558 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
18559 emit_insn (gen_strset (destptr, dest, value));
18560 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
18561 emit_insn (gen_strset (destptr, dest, value));
18565 if ((countval & 0x04) && max_size > 4)
18567 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
18568 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
18571 if ((countval & 0x02) && max_size > 2)
18573 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
18574 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
18577 if ((countval & 0x01) && max_size > 1)
18579 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
18580 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
18587 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
18592 rtx label = ix86_expand_aligntest (count, 16, true);
18595 dest = change_address (destmem, DImode, destptr);
18596 emit_insn (gen_strset (destptr, dest, value));
18597 emit_insn (gen_strset (destptr, dest, value));
18601 dest = change_address (destmem, SImode, destptr);
18602 emit_insn (gen_strset (destptr, dest, value));
18603 emit_insn (gen_strset (destptr, dest, value));
18604 emit_insn (gen_strset (destptr, dest, value));
18605 emit_insn (gen_strset (destptr, dest, value));
18607 emit_label (label);
18608 LABEL_NUSES (label) = 1;
18612 rtx label = ix86_expand_aligntest (count, 8, true);
18615 dest = change_address (destmem, DImode, destptr);
18616 emit_insn (gen_strset (destptr, dest, value));
18620 dest = change_address (destmem, SImode, destptr);
18621 emit_insn (gen_strset (destptr, dest, value));
18622 emit_insn (gen_strset (destptr, dest, value));
18624 emit_label (label);
18625 LABEL_NUSES (label) = 1;
18629 rtx label = ix86_expand_aligntest (count, 4, true);
18630 dest = change_address (destmem, SImode, destptr);
18631 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
18632 emit_label (label);
18633 LABEL_NUSES (label) = 1;
18637 rtx label = ix86_expand_aligntest (count, 2, true);
18638 dest = change_address (destmem, HImode, destptr);
18639 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
18640 emit_label (label);
18641 LABEL_NUSES (label) = 1;
18645 rtx label = ix86_expand_aligntest (count, 1, true);
18646 dest = change_address (destmem, QImode, destptr);
18647 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
18648 emit_label (label);
18649 LABEL_NUSES (label) = 1;
18653 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
18654 DESIRED_ALIGNMENT. */
18656 expand_movmem_prologue (rtx destmem, rtx srcmem,
18657 rtx destptr, rtx srcptr, rtx count,
18658 int align, int desired_alignment)
18660 if (align <= 1 && desired_alignment > 1)
18662 rtx label = ix86_expand_aligntest (destptr, 1, false);
18663 srcmem = change_address (srcmem, QImode, srcptr);
18664 destmem = change_address (destmem, QImode, destptr);
18665 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
18666 ix86_adjust_counter (count, 1);
18667 emit_label (label);
18668 LABEL_NUSES (label) = 1;
18670 if (align <= 2 && desired_alignment > 2)
18672 rtx label = ix86_expand_aligntest (destptr, 2, false);
18673 srcmem = change_address (srcmem, HImode, srcptr);
18674 destmem = change_address (destmem, HImode, destptr);
18675 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
18676 ix86_adjust_counter (count, 2);
18677 emit_label (label);
18678 LABEL_NUSES (label) = 1;
18680 if (align <= 4 && desired_alignment > 4)
18682 rtx label = ix86_expand_aligntest (destptr, 4, false);
18683 srcmem = change_address (srcmem, SImode, srcptr);
18684 destmem = change_address (destmem, SImode, destptr);
18685 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
18686 ix86_adjust_counter (count, 4);
18687 emit_label (label);
18688 LABEL_NUSES (label) = 1;
18690 gcc_assert (desired_alignment <= 8);
18693 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
18694 ALIGN_BYTES is how many bytes need to be copied. */
18696 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
18697 int desired_align, int align_bytes)
18700 rtx src_size, dst_size;
18702 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
18703 if (src_align_bytes >= 0)
18704 src_align_bytes = desired_align - src_align_bytes;
18705 src_size = MEM_SIZE (src);
18706 dst_size = MEM_SIZE (dst);
18707 if (align_bytes & 1)
18709 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
18710 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
18712 emit_insn (gen_strmov (destreg, dst, srcreg, src));
18714 if (align_bytes & 2)
18716 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
18717 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
18718 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
18719 set_mem_align (dst, 2 * BITS_PER_UNIT);
18720 if (src_align_bytes >= 0
18721 && (src_align_bytes & 1) == (align_bytes & 1)
18722 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
18723 set_mem_align (src, 2 * BITS_PER_UNIT);
18725 emit_insn (gen_strmov (destreg, dst, srcreg, src));
18727 if (align_bytes & 4)
18729 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
18730 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
18731 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
18732 set_mem_align (dst, 4 * BITS_PER_UNIT);
18733 if (src_align_bytes >= 0)
18735 unsigned int src_align = 0;
18736 if ((src_align_bytes & 3) == (align_bytes & 3))
18738 else if ((src_align_bytes & 1) == (align_bytes & 1))
18740 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
18741 set_mem_align (src, src_align * BITS_PER_UNIT);
18744 emit_insn (gen_strmov (destreg, dst, srcreg, src));
18746 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
18747 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
18748 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
18749 set_mem_align (dst, desired_align * BITS_PER_UNIT);
18750 if (src_align_bytes >= 0)
18752 unsigned int src_align = 0;
18753 if ((src_align_bytes & 7) == (align_bytes & 7))
18755 else if ((src_align_bytes & 3) == (align_bytes & 3))
18757 else if ((src_align_bytes & 1) == (align_bytes & 1))
18759 if (src_align > (unsigned int) desired_align)
18760 src_align = desired_align;
18761 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
18762 set_mem_align (src, src_align * BITS_PER_UNIT);
18765 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
18767 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
18772 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
18773 DESIRED_ALIGNMENT. */
18775 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
18776 int align, int desired_alignment)
18778 if (align <= 1 && desired_alignment > 1)
18780 rtx label = ix86_expand_aligntest (destptr, 1, false);
18781 destmem = change_address (destmem, QImode, destptr);
18782 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
18783 ix86_adjust_counter (count, 1);
18784 emit_label (label);
18785 LABEL_NUSES (label) = 1;
18787 if (align <= 2 && desired_alignment > 2)
18789 rtx label = ix86_expand_aligntest (destptr, 2, false);
18790 destmem = change_address (destmem, HImode, destptr);
18791 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
18792 ix86_adjust_counter (count, 2);
18793 emit_label (label);
18794 LABEL_NUSES (label) = 1;
18796 if (align <= 4 && desired_alignment > 4)
18798 rtx label = ix86_expand_aligntest (destptr, 4, false);
18799 destmem = change_address (destmem, SImode, destptr);
18800 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
18801 ix86_adjust_counter (count, 4);
18802 emit_label (label);
18803 LABEL_NUSES (label) = 1;
18805 gcc_assert (desired_alignment <= 8);
18808 /* Set enough from DST to align DST known to by aligned by ALIGN to
18809 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
18811 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
18812 int desired_align, int align_bytes)
18815 rtx dst_size = MEM_SIZE (dst);
18816 if (align_bytes & 1)
18818 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
18820 emit_insn (gen_strset (destreg, dst,
18821 gen_lowpart (QImode, value)));
18823 if (align_bytes & 2)
18825 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
18826 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
18827 set_mem_align (dst, 2 * BITS_PER_UNIT);
18829 emit_insn (gen_strset (destreg, dst,
18830 gen_lowpart (HImode, value)));
18832 if (align_bytes & 4)
18834 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
18835 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
18836 set_mem_align (dst, 4 * BITS_PER_UNIT);
18838 emit_insn (gen_strset (destreg, dst,
18839 gen_lowpart (SImode, value)));
18841 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
18842 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
18843 set_mem_align (dst, desired_align * BITS_PER_UNIT);
18845 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
18849 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
18850 static enum stringop_alg
18851 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
18852 int *dynamic_check)
18854 const struct stringop_algs * algs;
18855 bool optimize_for_speed;
18856 /* Algorithms using the rep prefix want at least edi and ecx;
18857 additionally, memset wants eax and memcpy wants esi. Don't
18858 consider such algorithms if the user has appropriated those
18859 registers for their own purposes. */
18860 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
18862 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
18864 #define ALG_USABLE_P(alg) (rep_prefix_usable \
18865 || (alg != rep_prefix_1_byte \
18866 && alg != rep_prefix_4_byte \
18867 && alg != rep_prefix_8_byte))
18868 const struct processor_costs *cost;
18870 /* Even if the string operation call is cold, we still might spend a lot
18871 of time processing large blocks. */
18872 if (optimize_function_for_size_p (cfun)
18873 || (optimize_insn_for_size_p ()
18874 && expected_size != -1 && expected_size < 256))
18875 optimize_for_speed = false;
18877 optimize_for_speed = true;
18879 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
18881 *dynamic_check = -1;
18883 algs = &cost->memset[TARGET_64BIT != 0];
18885 algs = &cost->memcpy[TARGET_64BIT != 0];
18886 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
18887 return stringop_alg;
18888 /* rep; movq or rep; movl is the smallest variant. */
18889 else if (!optimize_for_speed)
18891 if (!count || (count & 3))
18892 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
18894 return rep_prefix_usable ? rep_prefix_4_byte : loop;
18896 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
18898 else if (expected_size != -1 && expected_size < 4)
18899 return loop_1_byte;
18900 else if (expected_size != -1)
18903 enum stringop_alg alg = libcall;
18904 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
18906 /* We get here if the algorithms that were not libcall-based
18907 were rep-prefix based and we are unable to use rep prefixes
18908 based on global register usage. Break out of the loop and
18909 use the heuristic below. */
18910 if (algs->size[i].max == 0)
18912 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
18914 enum stringop_alg candidate = algs->size[i].alg;
18916 if (candidate != libcall && ALG_USABLE_P (candidate))
18918 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
18919 last non-libcall inline algorithm. */
18920 if (TARGET_INLINE_ALL_STRINGOPS)
18922 /* When the current size is best to be copied by a libcall,
18923 but we are still forced to inline, run the heuristic below
18924 that will pick code for medium sized blocks. */
18925 if (alg != libcall)
18929 else if (ALG_USABLE_P (candidate))
18933 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
18935 /* When asked to inline the call anyway, try to pick meaningful choice.
18936 We look for maximal size of block that is faster to copy by hand and
18937 take blocks of at most of that size guessing that average size will
18938 be roughly half of the block.
18940 If this turns out to be bad, we might simply specify the preferred
18941 choice in ix86_costs. */
18942 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18943 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
18946 enum stringop_alg alg;
18948 bool any_alg_usable_p = true;
18950 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
18952 enum stringop_alg candidate = algs->size[i].alg;
18953 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
18955 if (candidate != libcall && candidate
18956 && ALG_USABLE_P (candidate))
18957 max = algs->size[i].max;
18959 /* If there aren't any usable algorithms, then recursing on
18960 smaller sizes isn't going to find anything. Just return the
18961 simple byte-at-a-time copy loop. */
18962 if (!any_alg_usable_p)
18964 /* Pick something reasonable. */
18965 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18966 *dynamic_check = 128;
18967 return loop_1_byte;
18971 alg = decide_alg (count, max / 2, memset, dynamic_check);
18972 gcc_assert (*dynamic_check == -1);
18973 gcc_assert (alg != libcall);
18974 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18975 *dynamic_check = max;
18978 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
18979 #undef ALG_USABLE_P
18982 /* Decide on alignment. We know that the operand is already aligned to ALIGN
18983 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
18985 decide_alignment (int align,
18986 enum stringop_alg alg,
18989 int desired_align = 0;
18993 gcc_unreachable ();
18995 case unrolled_loop:
18996 desired_align = GET_MODE_SIZE (Pmode);
18998 case rep_prefix_8_byte:
19001 case rep_prefix_4_byte:
19002 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
19003 copying whole cacheline at once. */
19004 if (TARGET_PENTIUMPRO)
19009 case rep_prefix_1_byte:
19010 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
19011 copying whole cacheline at once. */
19012 if (TARGET_PENTIUMPRO)
19026 if (desired_align < align)
19027 desired_align = align;
19028 if (expected_size != -1 && expected_size < 4)
19029 desired_align = align;
19030 return desired_align;
19033 /* Return the smallest power of 2 greater than VAL. */
19035 smallest_pow2_greater_than (int val)
19043 /* Expand string move (memcpy) operation. Use i386 string operations when
19044 profitable. expand_setmem contains similar code. The code depends upon
19045 architecture, block size and alignment, but always has the same
19048 1) Prologue guard: Conditional that jumps up to epilogues for small
19049 blocks that can be handled by epilogue alone. This is faster but
19050 also needed for correctness, since prologue assume the block is larger
19051 than the desired alignment.
19053 Optional dynamic check for size and libcall for large
19054 blocks is emitted here too, with -minline-stringops-dynamically.
19056 2) Prologue: copy first few bytes in order to get destination aligned
19057 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
19058 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
19059 We emit either a jump tree on power of two sized blocks, or a byte loop.
19061 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
19062 with specified algorithm.
19064 4) Epilogue: code copying tail of the block that is too small to be
19065 handled by main body (or up to size guarded by prologue guard). */
19068 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
19069 rtx expected_align_exp, rtx expected_size_exp)
19075 rtx jump_around_label = NULL;
19076 HOST_WIDE_INT align = 1;
19077 unsigned HOST_WIDE_INT count = 0;
19078 HOST_WIDE_INT expected_size = -1;
19079 int size_needed = 0, epilogue_size_needed;
19080 int desired_align = 0, align_bytes = 0;
19081 enum stringop_alg alg;
19083 bool need_zero_guard = false;
19085 if (CONST_INT_P (align_exp))
19086 align = INTVAL (align_exp);
19087 /* i386 can do misaligned access on reasonably increased cost. */
19088 if (CONST_INT_P (expected_align_exp)
19089 && INTVAL (expected_align_exp) > align)
19090 align = INTVAL (expected_align_exp);
19091 /* ALIGN is the minimum of destination and source alignment, but we care here
19092 just about destination alignment. */
19093 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
19094 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
19096 if (CONST_INT_P (count_exp))
19097 count = expected_size = INTVAL (count_exp);
19098 if (CONST_INT_P (expected_size_exp) && count == 0)
19099 expected_size = INTVAL (expected_size_exp);
19101 /* Make sure we don't need to care about overflow later on. */
19102 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
19105 /* Step 0: Decide on preferred algorithm, desired alignment and
19106 size of chunks to be copied by main loop. */
19108 alg = decide_alg (count, expected_size, false, &dynamic_check);
19109 desired_align = decide_alignment (align, alg, expected_size);
19111 if (!TARGET_ALIGN_STRINGOPS)
19112 align = desired_align;
19114 if (alg == libcall)
19116 gcc_assert (alg != no_stringop);
19118 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
19119 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
19120 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
19125 gcc_unreachable ();
19127 need_zero_guard = true;
19128 size_needed = GET_MODE_SIZE (Pmode);
19130 case unrolled_loop:
19131 need_zero_guard = true;
19132 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
19134 case rep_prefix_8_byte:
19137 case rep_prefix_4_byte:
19140 case rep_prefix_1_byte:
19144 need_zero_guard = true;
19149 epilogue_size_needed = size_needed;
19151 /* Step 1: Prologue guard. */
19153 /* Alignment code needs count to be in register. */
19154 if (CONST_INT_P (count_exp) && desired_align > align)
19156 if (INTVAL (count_exp) > desired_align
19157 && INTVAL (count_exp) > size_needed)
19160 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
19161 if (align_bytes <= 0)
19164 align_bytes = desired_align - align_bytes;
19166 if (align_bytes == 0)
19167 count_exp = force_reg (counter_mode (count_exp), count_exp);
19169 gcc_assert (desired_align >= 1 && align >= 1);
19171 /* Ensure that alignment prologue won't copy past end of block. */
19172 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
19174 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
19175 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
19176 Make sure it is power of 2. */
19177 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
19181 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
19183 /* If main algorithm works on QImode, no epilogue is needed.
19184 For small sizes just don't align anything. */
19185 if (size_needed == 1)
19186 desired_align = align;
19193 label = gen_label_rtx ();
19194 emit_cmp_and_jump_insns (count_exp,
19195 GEN_INT (epilogue_size_needed),
19196 LTU, 0, counter_mode (count_exp), 1, label);
19197 if (expected_size == -1 || expected_size < epilogue_size_needed)
19198 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19200 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19204 /* Emit code to decide on runtime whether library call or inline should be
19206 if (dynamic_check != -1)
19208 if (CONST_INT_P (count_exp))
19210 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
19212 emit_block_move_via_libcall (dst, src, count_exp, false);
19213 count_exp = const0_rtx;
19219 rtx hot_label = gen_label_rtx ();
19220 jump_around_label = gen_label_rtx ();
19221 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
19222 LEU, 0, GET_MODE (count_exp), 1, hot_label);
19223 predict_jump (REG_BR_PROB_BASE * 90 / 100);
19224 emit_block_move_via_libcall (dst, src, count_exp, false);
19225 emit_jump (jump_around_label);
19226 emit_label (hot_label);
19230 /* Step 2: Alignment prologue. */
19232 if (desired_align > align)
19234 if (align_bytes == 0)
19236 /* Except for the first move in epilogue, we no longer know
19237 constant offset in aliasing info. It don't seems to worth
19238 the pain to maintain it for the first move, so throw away
19240 src = change_address (src, BLKmode, srcreg);
19241 dst = change_address (dst, BLKmode, destreg);
19242 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
19247 /* If we know how many bytes need to be stored before dst is
19248 sufficiently aligned, maintain aliasing info accurately. */
19249 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
19250 desired_align, align_bytes);
19251 count_exp = plus_constant (count_exp, -align_bytes);
19252 count -= align_bytes;
19254 if (need_zero_guard
19255 && (count < (unsigned HOST_WIDE_INT) size_needed
19256 || (align_bytes == 0
19257 && count < ((unsigned HOST_WIDE_INT) size_needed
19258 + desired_align - align))))
19260 /* It is possible that we copied enough so the main loop will not
19262 gcc_assert (size_needed > 1);
19263 if (label == NULL_RTX)
19264 label = gen_label_rtx ();
19265 emit_cmp_and_jump_insns (count_exp,
19266 GEN_INT (size_needed),
19267 LTU, 0, counter_mode (count_exp), 1, label);
19268 if (expected_size == -1
19269 || expected_size < (desired_align - align) / 2 + size_needed)
19270 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19272 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19275 if (label && size_needed == 1)
19277 emit_label (label);
19278 LABEL_NUSES (label) = 1;
19280 epilogue_size_needed = 1;
19282 else if (label == NULL_RTX)
19283 epilogue_size_needed = size_needed;
19285 /* Step 3: Main loop. */
19291 gcc_unreachable ();
19293 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
19294 count_exp, QImode, 1, expected_size);
19297 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
19298 count_exp, Pmode, 1, expected_size);
19300 case unrolled_loop:
19301 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
19302 registers for 4 temporaries anyway. */
19303 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
19304 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
19307 case rep_prefix_8_byte:
19308 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
19311 case rep_prefix_4_byte:
19312 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
19315 case rep_prefix_1_byte:
19316 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
19320 /* Adjust properly the offset of src and dest memory for aliasing. */
19321 if (CONST_INT_P (count_exp))
19323 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
19324 (count / size_needed) * size_needed);
19325 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
19326 (count / size_needed) * size_needed);
19330 src = change_address (src, BLKmode, srcreg);
19331 dst = change_address (dst, BLKmode, destreg);
19334 /* Step 4: Epilogue to copy the remaining bytes. */
19338 /* When the main loop is done, COUNT_EXP might hold original count,
19339 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
19340 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
19341 bytes. Compensate if needed. */
19343 if (size_needed < epilogue_size_needed)
19346 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
19347 GEN_INT (size_needed - 1), count_exp, 1,
19349 if (tmp != count_exp)
19350 emit_move_insn (count_exp, tmp);
19352 emit_label (label);
19353 LABEL_NUSES (label) = 1;
19356 if (count_exp != const0_rtx && epilogue_size_needed > 1)
19357 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
19358 epilogue_size_needed);
19359 if (jump_around_label)
19360 emit_label (jump_around_label);
19364 /* Helper function for memcpy. For QImode value 0xXY produce
19365 0xXYXYXYXY of wide specified by MODE. This is essentially
19366 a * 0x10101010, but we can do slightly better than
19367 synth_mult by unwinding the sequence by hand on CPUs with
19370 promote_duplicated_reg (enum machine_mode mode, rtx val)
19372 enum machine_mode valmode = GET_MODE (val);
19374 int nops = mode == DImode ? 3 : 2;
19376 gcc_assert (mode == SImode || mode == DImode);
19377 if (val == const0_rtx)
19378 return copy_to_mode_reg (mode, const0_rtx);
19379 if (CONST_INT_P (val))
19381 HOST_WIDE_INT v = INTVAL (val) & 255;
19385 if (mode == DImode)
19386 v |= (v << 16) << 16;
19387 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
19390 if (valmode == VOIDmode)
19392 if (valmode != QImode)
19393 val = gen_lowpart (QImode, val);
19394 if (mode == QImode)
19396 if (!TARGET_PARTIAL_REG_STALL)
19398 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
19399 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
19400 <= (ix86_cost->shift_const + ix86_cost->add) * nops
19401 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
19403 rtx reg = convert_modes (mode, QImode, val, true);
19404 tmp = promote_duplicated_reg (mode, const1_rtx);
19405 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
19410 rtx reg = convert_modes (mode, QImode, val, true);
19412 if (!TARGET_PARTIAL_REG_STALL)
19413 if (mode == SImode)
19414 emit_insn (gen_movsi_insv_1 (reg, reg));
19416 emit_insn (gen_movdi_insv_1 (reg, reg));
19419 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
19420 NULL, 1, OPTAB_DIRECT);
19422 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
19424 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
19425 NULL, 1, OPTAB_DIRECT);
19426 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
19427 if (mode == SImode)
19429 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
19430 NULL, 1, OPTAB_DIRECT);
19431 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
19436 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
19437 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
19438 alignment from ALIGN to DESIRED_ALIGN. */
19440 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
19445 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
19446 promoted_val = promote_duplicated_reg (DImode, val);
19447 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
19448 promoted_val = promote_duplicated_reg (SImode, val);
19449 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
19450 promoted_val = promote_duplicated_reg (HImode, val);
19452 promoted_val = val;
19454 return promoted_val;
19457 /* Expand string clear operation (bzero). Use i386 string operations when
19458 profitable. See expand_movmem comment for explanation of individual
19459 steps performed. */
19461 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
19462 rtx expected_align_exp, rtx expected_size_exp)
19467 rtx jump_around_label = NULL;
19468 HOST_WIDE_INT align = 1;
19469 unsigned HOST_WIDE_INT count = 0;
19470 HOST_WIDE_INT expected_size = -1;
19471 int size_needed = 0, epilogue_size_needed;
19472 int desired_align = 0, align_bytes = 0;
19473 enum stringop_alg alg;
19474 rtx promoted_val = NULL;
19475 bool force_loopy_epilogue = false;
19477 bool need_zero_guard = false;
19479 if (CONST_INT_P (align_exp))
19480 align = INTVAL (align_exp);
19481 /* i386 can do misaligned access on reasonably increased cost. */
19482 if (CONST_INT_P (expected_align_exp)
19483 && INTVAL (expected_align_exp) > align)
19484 align = INTVAL (expected_align_exp);
19485 if (CONST_INT_P (count_exp))
19486 count = expected_size = INTVAL (count_exp);
19487 if (CONST_INT_P (expected_size_exp) && count == 0)
19488 expected_size = INTVAL (expected_size_exp);
19490 /* Make sure we don't need to care about overflow later on. */
19491 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
19494 /* Step 0: Decide on preferred algorithm, desired alignment and
19495 size of chunks to be copied by main loop. */
19497 alg = decide_alg (count, expected_size, true, &dynamic_check);
19498 desired_align = decide_alignment (align, alg, expected_size);
19500 if (!TARGET_ALIGN_STRINGOPS)
19501 align = desired_align;
19503 if (alg == libcall)
19505 gcc_assert (alg != no_stringop);
19507 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
19508 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
19513 gcc_unreachable ();
19515 need_zero_guard = true;
19516 size_needed = GET_MODE_SIZE (Pmode);
19518 case unrolled_loop:
19519 need_zero_guard = true;
19520 size_needed = GET_MODE_SIZE (Pmode) * 4;
19522 case rep_prefix_8_byte:
19525 case rep_prefix_4_byte:
19528 case rep_prefix_1_byte:
19532 need_zero_guard = true;
19536 epilogue_size_needed = size_needed;
19538 /* Step 1: Prologue guard. */
19540 /* Alignment code needs count to be in register. */
19541 if (CONST_INT_P (count_exp) && desired_align > align)
19543 if (INTVAL (count_exp) > desired_align
19544 && INTVAL (count_exp) > size_needed)
19547 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
19548 if (align_bytes <= 0)
19551 align_bytes = desired_align - align_bytes;
19553 if (align_bytes == 0)
19555 enum machine_mode mode = SImode;
19556 if (TARGET_64BIT && (count & ~0xffffffff))
19558 count_exp = force_reg (mode, count_exp);
19561 /* Do the cheap promotion to allow better CSE across the
19562 main loop and epilogue (ie one load of the big constant in the
19563 front of all code. */
19564 if (CONST_INT_P (val_exp))
19565 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
19566 desired_align, align);
19567 /* Ensure that alignment prologue won't copy past end of block. */
19568 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
19570 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
19571 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
19572 Make sure it is power of 2. */
19573 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
19575 /* To improve performance of small blocks, we jump around the VAL
19576 promoting mode. This mean that if the promoted VAL is not constant,
19577 we might not use it in the epilogue and have to use byte
19579 if (epilogue_size_needed > 2 && !promoted_val)
19580 force_loopy_epilogue = true;
19583 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
19585 /* If main algorithm works on QImode, no epilogue is needed.
19586 For small sizes just don't align anything. */
19587 if (size_needed == 1)
19588 desired_align = align;
19595 label = gen_label_rtx ();
19596 emit_cmp_and_jump_insns (count_exp,
19597 GEN_INT (epilogue_size_needed),
19598 LTU, 0, counter_mode (count_exp), 1, label);
19599 if (expected_size == -1 || expected_size <= epilogue_size_needed)
19600 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19602 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19605 if (dynamic_check != -1)
19607 rtx hot_label = gen_label_rtx ();
19608 jump_around_label = gen_label_rtx ();
19609 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
19610 LEU, 0, counter_mode (count_exp), 1, hot_label);
19611 predict_jump (REG_BR_PROB_BASE * 90 / 100);
19612 set_storage_via_libcall (dst, count_exp, val_exp, false);
19613 emit_jump (jump_around_label);
19614 emit_label (hot_label);
19617 /* Step 2: Alignment prologue. */
19619 /* Do the expensive promotion once we branched off the small blocks. */
19621 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
19622 desired_align, align);
19623 gcc_assert (desired_align >= 1 && align >= 1);
19625 if (desired_align > align)
19627 if (align_bytes == 0)
19629 /* Except for the first move in epilogue, we no longer know
19630 constant offset in aliasing info. It don't seems to worth
19631 the pain to maintain it for the first move, so throw away
19633 dst = change_address (dst, BLKmode, destreg);
19634 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
19639 /* If we know how many bytes need to be stored before dst is
19640 sufficiently aligned, maintain aliasing info accurately. */
19641 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
19642 desired_align, align_bytes);
19643 count_exp = plus_constant (count_exp, -align_bytes);
19644 count -= align_bytes;
19646 if (need_zero_guard
19647 && (count < (unsigned HOST_WIDE_INT) size_needed
19648 || (align_bytes == 0
19649 && count < ((unsigned HOST_WIDE_INT) size_needed
19650 + desired_align - align))))
19652 /* It is possible that we copied enough so the main loop will not
19654 gcc_assert (size_needed > 1);
19655 if (label == NULL_RTX)
19656 label = gen_label_rtx ();
19657 emit_cmp_and_jump_insns (count_exp,
19658 GEN_INT (size_needed),
19659 LTU, 0, counter_mode (count_exp), 1, label);
19660 if (expected_size == -1
19661 || expected_size < (desired_align - align) / 2 + size_needed)
19662 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19664 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19667 if (label && size_needed == 1)
19669 emit_label (label);
19670 LABEL_NUSES (label) = 1;
19672 promoted_val = val_exp;
19673 epilogue_size_needed = 1;
19675 else if (label == NULL_RTX)
19676 epilogue_size_needed = size_needed;
19678 /* Step 3: Main loop. */
19684 gcc_unreachable ();
19686 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
19687 count_exp, QImode, 1, expected_size);
19690 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
19691 count_exp, Pmode, 1, expected_size);
19693 case unrolled_loop:
19694 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
19695 count_exp, Pmode, 4, expected_size);
19697 case rep_prefix_8_byte:
19698 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
19701 case rep_prefix_4_byte:
19702 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
19705 case rep_prefix_1_byte:
19706 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
19710 /* Adjust properly the offset of src and dest memory for aliasing. */
19711 if (CONST_INT_P (count_exp))
19712 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
19713 (count / size_needed) * size_needed);
19715 dst = change_address (dst, BLKmode, destreg);
19717 /* Step 4: Epilogue to copy the remaining bytes. */
19721 /* When the main loop is done, COUNT_EXP might hold original count,
19722 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
19723 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
19724 bytes. Compensate if needed. */
19726 if (size_needed < epilogue_size_needed)
19729 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
19730 GEN_INT (size_needed - 1), count_exp, 1,
19732 if (tmp != count_exp)
19733 emit_move_insn (count_exp, tmp);
19735 emit_label (label);
19736 LABEL_NUSES (label) = 1;
19739 if (count_exp != const0_rtx && epilogue_size_needed > 1)
19741 if (force_loopy_epilogue)
19742 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
19743 epilogue_size_needed);
19745 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
19746 epilogue_size_needed);
19748 if (jump_around_label)
19749 emit_label (jump_around_label);
19753 /* Expand the appropriate insns for doing strlen if not just doing
19756 out = result, initialized with the start address
19757 align_rtx = alignment of the address.
19758 scratch = scratch register, initialized with the startaddress when
19759 not aligned, otherwise undefined
19761 This is just the body. It needs the initializations mentioned above and
19762 some address computing at the end. These things are done in i386.md. */
19765 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
19769 rtx align_2_label = NULL_RTX;
19770 rtx align_3_label = NULL_RTX;
19771 rtx align_4_label = gen_label_rtx ();
19772 rtx end_0_label = gen_label_rtx ();
19774 rtx tmpreg = gen_reg_rtx (SImode);
19775 rtx scratch = gen_reg_rtx (SImode);
19779 if (CONST_INT_P (align_rtx))
19780 align = INTVAL (align_rtx);
19782 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
19784 /* Is there a known alignment and is it less than 4? */
19787 rtx scratch1 = gen_reg_rtx (Pmode);
19788 emit_move_insn (scratch1, out);
19789 /* Is there a known alignment and is it not 2? */
19792 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
19793 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
19795 /* Leave just the 3 lower bits. */
19796 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
19797 NULL_RTX, 0, OPTAB_WIDEN);
19799 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
19800 Pmode, 1, align_4_label);
19801 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
19802 Pmode, 1, align_2_label);
19803 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
19804 Pmode, 1, align_3_label);
19808 /* Since the alignment is 2, we have to check 2 or 0 bytes;
19809 check if is aligned to 4 - byte. */
19811 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
19812 NULL_RTX, 0, OPTAB_WIDEN);
19814 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
19815 Pmode, 1, align_4_label);
19818 mem = change_address (src, QImode, out);
19820 /* Now compare the bytes. */
19822 /* Compare the first n unaligned byte on a byte per byte basis. */
19823 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
19824 QImode, 1, end_0_label);
19826 /* Increment the address. */
19827 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
19829 /* Not needed with an alignment of 2 */
19832 emit_label (align_2_label);
19834 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
19837 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
19839 emit_label (align_3_label);
19842 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
19845 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
19848 /* Generate loop to check 4 bytes at a time. It is not a good idea to
19849 align this loop. It gives only huge programs, but does not help to
19851 emit_label (align_4_label);
19853 mem = change_address (src, SImode, out);
19854 emit_move_insn (scratch, mem);
19855 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
19857 /* This formula yields a nonzero result iff one of the bytes is zero.
19858 This saves three branches inside loop and many cycles. */
19860 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
19861 emit_insn (gen_one_cmplsi2 (scratch, scratch));
19862 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
19863 emit_insn (gen_andsi3 (tmpreg, tmpreg,
19864 gen_int_mode (0x80808080, SImode)));
19865 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
19870 rtx reg = gen_reg_rtx (SImode);
19871 rtx reg2 = gen_reg_rtx (Pmode);
19872 emit_move_insn (reg, tmpreg);
19873 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
19875 /* If zero is not in the first two bytes, move two bytes forward. */
19876 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
19877 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19878 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
19879 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
19880 gen_rtx_IF_THEN_ELSE (SImode, tmp,
19883 /* Emit lea manually to avoid clobbering of flags. */
19884 emit_insn (gen_rtx_SET (SImode, reg2,
19885 gen_rtx_PLUS (Pmode, out, const2_rtx)));
19887 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19888 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
19889 emit_insn (gen_rtx_SET (VOIDmode, out,
19890 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
19896 rtx end_2_label = gen_label_rtx ();
19897 /* Is zero in the first two bytes? */
19899 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
19900 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19901 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
19902 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19903 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
19905 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19906 JUMP_LABEL (tmp) = end_2_label;
19908 /* Not in the first two. Move two bytes forward. */
19909 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
19910 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
19912 emit_label (end_2_label);
19916 /* Avoid branch in fixing the byte. */
19917 tmpreg = gen_lowpart (QImode, tmpreg);
19918 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
19919 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
19920 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
19921 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
19923 emit_label (end_0_label);
19926 /* Expand strlen. */
19929 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
19931 rtx addr, scratch1, scratch2, scratch3, scratch4;
19933 /* The generic case of strlen expander is long. Avoid it's
19934 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
19936 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
19937 && !TARGET_INLINE_ALL_STRINGOPS
19938 && !optimize_insn_for_size_p ()
19939 && (!CONST_INT_P (align) || INTVAL (align) < 4))
19942 addr = force_reg (Pmode, XEXP (src, 0));
19943 scratch1 = gen_reg_rtx (Pmode);
19945 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
19946 && !optimize_insn_for_size_p ())
19948 /* Well it seems that some optimizer does not combine a call like
19949 foo(strlen(bar), strlen(bar));
19950 when the move and the subtraction is done here. It does calculate
19951 the length just once when these instructions are done inside of
19952 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
19953 often used and I use one fewer register for the lifetime of
19954 output_strlen_unroll() this is better. */
19956 emit_move_insn (out, addr);
19958 ix86_expand_strlensi_unroll_1 (out, src, align);
19960 /* strlensi_unroll_1 returns the address of the zero at the end of
19961 the string, like memchr(), so compute the length by subtracting
19962 the start address. */
19963 emit_insn (ix86_gen_sub3 (out, out, addr));
19969 /* Can't use this if the user has appropriated eax, ecx, or edi. */
19970 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
19973 scratch2 = gen_reg_rtx (Pmode);
19974 scratch3 = gen_reg_rtx (Pmode);
19975 scratch4 = force_reg (Pmode, constm1_rtx);
19977 emit_move_insn (scratch3, addr);
19978 eoschar = force_reg (QImode, eoschar);
19980 src = replace_equiv_address_nv (src, scratch3);
19982 /* If .md starts supporting :P, this can be done in .md. */
19983 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
19984 scratch4), UNSPEC_SCAS);
19985 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
19986 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
19987 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
19992 /* For given symbol (function) construct code to compute address of it's PLT
19993 entry in large x86-64 PIC model. */
19995 construct_plt_address (rtx symbol)
19997 rtx tmp = gen_reg_rtx (Pmode);
19998 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
20000 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
20001 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
20003 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
20004 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
20009 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
20011 rtx pop, int sibcall)
20013 rtx use = NULL, call;
20015 if (pop == const0_rtx)
20017 gcc_assert (!TARGET_64BIT || !pop);
20019 if (TARGET_MACHO && !TARGET_64BIT)
20022 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
20023 fnaddr = machopic_indirect_call_target (fnaddr);
20028 /* Static functions and indirect calls don't need the pic register. */
20029 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
20030 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
20031 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
20032 use_reg (&use, pic_offset_table_rtx);
20035 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
20037 rtx al = gen_rtx_REG (QImode, AX_REG);
20038 emit_move_insn (al, callarg2);
20039 use_reg (&use, al);
20042 if (ix86_cmodel == CM_LARGE_PIC
20044 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
20045 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
20046 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
20048 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
20049 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
20051 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
20052 fnaddr = gen_rtx_MEM (QImode, fnaddr);
20055 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
20057 call = gen_rtx_SET (VOIDmode, retval, call);
20060 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
20061 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
20062 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
20065 && ix86_cfun_abi () == MS_ABI
20066 && (!callarg2 || INTVAL (callarg2) != -2))
20068 /* We need to represent that SI and DI registers are clobbered
20070 static int clobbered_registers[] = {
20071 XMM6_REG, XMM7_REG, XMM8_REG,
20072 XMM9_REG, XMM10_REG, XMM11_REG,
20073 XMM12_REG, XMM13_REG, XMM14_REG,
20074 XMM15_REG, SI_REG, DI_REG
20077 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
20078 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
20079 UNSPEC_MS_TO_SYSV_CALL);
20083 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
20084 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
20087 (SSE_REGNO_P (clobbered_registers[i])
20089 clobbered_registers[i]));
20091 call = gen_rtx_PARALLEL (VOIDmode,
20092 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
20096 call = emit_call_insn (call);
20098 CALL_INSN_FUNCTION_USAGE (call) = use;
20102 /* Clear stack slot assignments remembered from previous functions.
20103 This is called from INIT_EXPANDERS once before RTL is emitted for each
20106 static struct machine_function *
20107 ix86_init_machine_status (void)
20109 struct machine_function *f;
20111 f = ggc_alloc_cleared_machine_function ();
20112 f->use_fast_prologue_epilogue_nregs = -1;
20113 f->tls_descriptor_call_expanded_p = 0;
20114 f->call_abi = ix86_abi;
20119 /* Return a MEM corresponding to a stack slot with mode MODE.
20120 Allocate a new slot if necessary.
20122 The RTL for a function can have several slots available: N is
20123 which slot to use. */
20126 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
20128 struct stack_local_entry *s;
20130 gcc_assert (n < MAX_386_STACK_LOCALS);
20132 /* Virtual slot is valid only before vregs are instantiated. */
20133 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
20135 for (s = ix86_stack_locals; s; s = s->next)
20136 if (s->mode == mode && s->n == n)
20137 return copy_rtx (s->rtl);
20139 s = ggc_alloc_stack_local_entry ();
20142 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
20144 s->next = ix86_stack_locals;
20145 ix86_stack_locals = s;
20149 /* Construct the SYMBOL_REF for the tls_get_addr function. */
20151 static GTY(()) rtx ix86_tls_symbol;
20153 ix86_tls_get_addr (void)
20156 if (!ix86_tls_symbol)
20158 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
20159 (TARGET_ANY_GNU_TLS
20161 ? "___tls_get_addr"
20162 : "__tls_get_addr");
20165 return ix86_tls_symbol;
20168 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
20170 static GTY(()) rtx ix86_tls_module_base_symbol;
20172 ix86_tls_module_base (void)
20175 if (!ix86_tls_module_base_symbol)
20177 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
20178 "_TLS_MODULE_BASE_");
20179 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
20180 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
20183 return ix86_tls_module_base_symbol;
20186 /* Calculate the length of the memory address in the instruction
20187 encoding. Does not include the one-byte modrm, opcode, or prefix. */
20190 memory_address_length (rtx addr)
20192 struct ix86_address parts;
20193 rtx base, index, disp;
20197 if (GET_CODE (addr) == PRE_DEC
20198 || GET_CODE (addr) == POST_INC
20199 || GET_CODE (addr) == PRE_MODIFY
20200 || GET_CODE (addr) == POST_MODIFY)
20203 ok = ix86_decompose_address (addr, &parts);
20206 if (parts.base && GET_CODE (parts.base) == SUBREG)
20207 parts.base = SUBREG_REG (parts.base);
20208 if (parts.index && GET_CODE (parts.index) == SUBREG)
20209 parts.index = SUBREG_REG (parts.index);
20212 index = parts.index;
20217 - esp as the base always wants an index,
20218 - ebp as the base always wants a displacement,
20219 - r12 as the base always wants an index,
20220 - r13 as the base always wants a displacement. */
20222 /* Register Indirect. */
20223 if (base && !index && !disp)
20225 /* esp (for its index) and ebp (for its displacement) need
20226 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
20229 && (addr == arg_pointer_rtx
20230 || addr == frame_pointer_rtx
20231 || REGNO (addr) == SP_REG
20232 || REGNO (addr) == BP_REG
20233 || REGNO (addr) == R12_REG
20234 || REGNO (addr) == R13_REG))
20238 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
20239 is not disp32, but disp32(%rip), so for disp32
20240 SIB byte is needed, unless print_operand_address
20241 optimizes it into disp32(%rip) or (%rip) is implied
20243 else if (disp && !base && !index)
20250 if (GET_CODE (disp) == CONST)
20251 symbol = XEXP (disp, 0);
20252 if (GET_CODE (symbol) == PLUS
20253 && CONST_INT_P (XEXP (symbol, 1)))
20254 symbol = XEXP (symbol, 0);
20256 if (GET_CODE (symbol) != LABEL_REF
20257 && (GET_CODE (symbol) != SYMBOL_REF
20258 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
20259 && (GET_CODE (symbol) != UNSPEC
20260 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
20261 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
20268 /* Find the length of the displacement constant. */
20271 if (base && satisfies_constraint_K (disp))
20276 /* ebp always wants a displacement. Similarly r13. */
20277 else if (base && REG_P (base)
20278 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
20281 /* An index requires the two-byte modrm form.... */
20283 /* ...like esp (or r12), which always wants an index. */
20284 || base == arg_pointer_rtx
20285 || base == frame_pointer_rtx
20286 || (base && REG_P (base)
20287 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
20304 /* Compute default value for "length_immediate" attribute. When SHORTFORM
20305 is set, expect that insn have 8bit immediate alternative. */
20307 ix86_attr_length_immediate_default (rtx insn, int shortform)
20311 extract_insn_cached (insn);
20312 for (i = recog_data.n_operands - 1; i >= 0; --i)
20313 if (CONSTANT_P (recog_data.operand[i]))
20315 enum attr_mode mode = get_attr_mode (insn);
20318 if (shortform && CONST_INT_P (recog_data.operand[i]))
20320 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
20327 ival = trunc_int_for_mode (ival, HImode);
20330 ival = trunc_int_for_mode (ival, SImode);
20335 if (IN_RANGE (ival, -128, 127))
20352 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
20357 fatal_insn ("unknown insn mode", insn);
20362 /* Compute default value for "length_address" attribute. */
20364 ix86_attr_length_address_default (rtx insn)
20368 if (get_attr_type (insn) == TYPE_LEA)
20370 rtx set = PATTERN (insn), addr;
20372 if (GET_CODE (set) == PARALLEL)
20373 set = XVECEXP (set, 0, 0);
20375 gcc_assert (GET_CODE (set) == SET);
20377 addr = SET_SRC (set);
20378 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
20380 if (GET_CODE (addr) == ZERO_EXTEND)
20381 addr = XEXP (addr, 0);
20382 if (GET_CODE (addr) == SUBREG)
20383 addr = SUBREG_REG (addr);
20386 return memory_address_length (addr);
20389 extract_insn_cached (insn);
20390 for (i = recog_data.n_operands - 1; i >= 0; --i)
20391 if (MEM_P (recog_data.operand[i]))
20393 constrain_operands_cached (reload_completed);
20394 if (which_alternative != -1)
20396 const char *constraints = recog_data.constraints[i];
20397 int alt = which_alternative;
20399 while (*constraints == '=' || *constraints == '+')
20402 while (*constraints++ != ',')
20404 /* Skip ignored operands. */
20405 if (*constraints == 'X')
20408 return memory_address_length (XEXP (recog_data.operand[i], 0));
20413 /* Compute default value for "length_vex" attribute. It includes
20414 2 or 3 byte VEX prefix and 1 opcode byte. */
20417 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
20422 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
20423 byte VEX prefix. */
20424 if (!has_0f_opcode || has_vex_w)
20427 /* We can always use 2 byte VEX prefix in 32bit. */
20431 extract_insn_cached (insn);
20433 for (i = recog_data.n_operands - 1; i >= 0; --i)
20434 if (REG_P (recog_data.operand[i]))
20436 /* REX.W bit uses 3 byte VEX prefix. */
20437 if (GET_MODE (recog_data.operand[i]) == DImode
20438 && GENERAL_REG_P (recog_data.operand[i]))
20443 /* REX.X or REX.B bits use 3 byte VEX prefix. */
20444 if (MEM_P (recog_data.operand[i])
20445 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
20452 /* Return the maximum number of instructions a cpu can issue. */
20455 ix86_issue_rate (void)
20459 case PROCESSOR_PENTIUM:
20460 case PROCESSOR_ATOM:
20464 case PROCESSOR_PENTIUMPRO:
20465 case PROCESSOR_PENTIUM4:
20466 case PROCESSOR_ATHLON:
20468 case PROCESSOR_AMDFAM10:
20469 case PROCESSOR_NOCONA:
20470 case PROCESSOR_GENERIC32:
20471 case PROCESSOR_GENERIC64:
20472 case PROCESSOR_BDVER1:
20475 case PROCESSOR_CORE2:
20483 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
20484 by DEP_INSN and nothing set by DEP_INSN. */
20487 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
20491 /* Simplify the test for uninteresting insns. */
20492 if (insn_type != TYPE_SETCC
20493 && insn_type != TYPE_ICMOV
20494 && insn_type != TYPE_FCMOV
20495 && insn_type != TYPE_IBR)
20498 if ((set = single_set (dep_insn)) != 0)
20500 set = SET_DEST (set);
20503 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
20504 && XVECLEN (PATTERN (dep_insn), 0) == 2
20505 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
20506 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
20508 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
20509 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
20514 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
20517 /* This test is true if the dependent insn reads the flags but
20518 not any other potentially set register. */
20519 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
20522 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
20528 /* Return true iff USE_INSN has a memory address with operands set by
20532 ix86_agi_dependent (rtx set_insn, rtx use_insn)
20535 extract_insn_cached (use_insn);
20536 for (i = recog_data.n_operands - 1; i >= 0; --i)
20537 if (MEM_P (recog_data.operand[i]))
20539 rtx addr = XEXP (recog_data.operand[i], 0);
20540 return modified_in_p (addr, set_insn) != 0;
20546 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
20548 enum attr_type insn_type, dep_insn_type;
20549 enum attr_memory memory;
20551 int dep_insn_code_number;
20553 /* Anti and output dependencies have zero cost on all CPUs. */
20554 if (REG_NOTE_KIND (link) != 0)
20557 dep_insn_code_number = recog_memoized (dep_insn);
20559 /* If we can't recognize the insns, we can't really do anything. */
20560 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
20563 insn_type = get_attr_type (insn);
20564 dep_insn_type = get_attr_type (dep_insn);
20568 case PROCESSOR_PENTIUM:
20569 /* Address Generation Interlock adds a cycle of latency. */
20570 if (insn_type == TYPE_LEA)
20572 rtx addr = PATTERN (insn);
20574 if (GET_CODE (addr) == PARALLEL)
20575 addr = XVECEXP (addr, 0, 0);
20577 gcc_assert (GET_CODE (addr) == SET);
20579 addr = SET_SRC (addr);
20580 if (modified_in_p (addr, dep_insn))
20583 else if (ix86_agi_dependent (dep_insn, insn))
20586 /* ??? Compares pair with jump/setcc. */
20587 if (ix86_flags_dependent (insn, dep_insn, insn_type))
20590 /* Floating point stores require value to be ready one cycle earlier. */
20591 if (insn_type == TYPE_FMOV
20592 && get_attr_memory (insn) == MEMORY_STORE
20593 && !ix86_agi_dependent (dep_insn, insn))
20597 case PROCESSOR_PENTIUMPRO:
20598 memory = get_attr_memory (insn);
20600 /* INT->FP conversion is expensive. */
20601 if (get_attr_fp_int_src (dep_insn))
20604 /* There is one cycle extra latency between an FP op and a store. */
20605 if (insn_type == TYPE_FMOV
20606 && (set = single_set (dep_insn)) != NULL_RTX
20607 && (set2 = single_set (insn)) != NULL_RTX
20608 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
20609 && MEM_P (SET_DEST (set2)))
20612 /* Show ability of reorder buffer to hide latency of load by executing
20613 in parallel with previous instruction in case
20614 previous instruction is not needed to compute the address. */
20615 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
20616 && !ix86_agi_dependent (dep_insn, insn))
20618 /* Claim moves to take one cycle, as core can issue one load
20619 at time and the next load can start cycle later. */
20620 if (dep_insn_type == TYPE_IMOV
20621 || dep_insn_type == TYPE_FMOV)
20629 memory = get_attr_memory (insn);
20631 /* The esp dependency is resolved before the instruction is really
20633 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
20634 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
20637 /* INT->FP conversion is expensive. */
20638 if (get_attr_fp_int_src (dep_insn))
20641 /* Show ability of reorder buffer to hide latency of load by executing
20642 in parallel with previous instruction in case
20643 previous instruction is not needed to compute the address. */
20644 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
20645 && !ix86_agi_dependent (dep_insn, insn))
20647 /* Claim moves to take one cycle, as core can issue one load
20648 at time and the next load can start cycle later. */
20649 if (dep_insn_type == TYPE_IMOV
20650 || dep_insn_type == TYPE_FMOV)
20659 case PROCESSOR_ATHLON:
20661 case PROCESSOR_AMDFAM10:
20662 case PROCESSOR_BDVER1:
20663 case PROCESSOR_ATOM:
20664 case PROCESSOR_GENERIC32:
20665 case PROCESSOR_GENERIC64:
20666 memory = get_attr_memory (insn);
20668 /* Show ability of reorder buffer to hide latency of load by executing
20669 in parallel with previous instruction in case
20670 previous instruction is not needed to compute the address. */
20671 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
20672 && !ix86_agi_dependent (dep_insn, insn))
20674 enum attr_unit unit = get_attr_unit (insn);
20677 /* Because of the difference between the length of integer and
20678 floating unit pipeline preparation stages, the memory operands
20679 for floating point are cheaper.
20681 ??? For Athlon it the difference is most probably 2. */
20682 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
20685 loadcost = TARGET_ATHLON ? 2 : 0;
20687 if (cost >= loadcost)
20700 /* How many alternative schedules to try. This should be as wide as the
20701 scheduling freedom in the DFA, but no wider. Making this value too
20702 large results extra work for the scheduler. */
20705 ia32_multipass_dfa_lookahead (void)
20709 case PROCESSOR_PENTIUM:
20712 case PROCESSOR_PENTIUMPRO:
20722 /* Compute the alignment given to a constant that is being placed in memory.
20723 EXP is the constant and ALIGN is the alignment that the object would
20725 The value of this function is used instead of that alignment to align
20729 ix86_constant_alignment (tree exp, int align)
20731 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
20732 || TREE_CODE (exp) == INTEGER_CST)
20734 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
20736 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
20739 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
20740 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
20741 return BITS_PER_WORD;
20746 /* Compute the alignment for a static variable.
20747 TYPE is the data type, and ALIGN is the alignment that
20748 the object would ordinarily have. The value of this function is used
20749 instead of that alignment to align the object. */
20752 ix86_data_alignment (tree type, int align)
20754 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
20756 if (AGGREGATE_TYPE_P (type)
20757 && TYPE_SIZE (type)
20758 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
20759 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
20760 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
20761 && align < max_align)
20764 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
20765 to 16byte boundary. */
20768 if (AGGREGATE_TYPE_P (type)
20769 && TYPE_SIZE (type)
20770 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
20771 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
20772 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
20776 if (TREE_CODE (type) == ARRAY_TYPE)
20778 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
20780 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
20783 else if (TREE_CODE (type) == COMPLEX_TYPE)
20786 if (TYPE_MODE (type) == DCmode && align < 64)
20788 if ((TYPE_MODE (type) == XCmode
20789 || TYPE_MODE (type) == TCmode) && align < 128)
20792 else if ((TREE_CODE (type) == RECORD_TYPE
20793 || TREE_CODE (type) == UNION_TYPE
20794 || TREE_CODE (type) == QUAL_UNION_TYPE)
20795 && TYPE_FIELDS (type))
20797 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
20799 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
20802 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
20803 || TREE_CODE (type) == INTEGER_TYPE)
20805 if (TYPE_MODE (type) == DFmode && align < 64)
20807 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
20814 /* Compute the alignment for a local variable or a stack slot. EXP is
20815 the data type or decl itself, MODE is the widest mode available and
20816 ALIGN is the alignment that the object would ordinarily have. The
20817 value of this macro is used instead of that alignment to align the
20821 ix86_local_alignment (tree exp, enum machine_mode mode,
20822 unsigned int align)
20826 if (exp && DECL_P (exp))
20828 type = TREE_TYPE (exp);
20837 /* Don't do dynamic stack realignment for long long objects with
20838 -mpreferred-stack-boundary=2. */
20841 && ix86_preferred_stack_boundary < 64
20842 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
20843 && (!type || !TYPE_USER_ALIGN (type))
20844 && (!decl || !DECL_USER_ALIGN (decl)))
20847 /* If TYPE is NULL, we are allocating a stack slot for caller-save
20848 register in MODE. We will return the largest alignment of XF
20852 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
20853 align = GET_MODE_ALIGNMENT (DFmode);
20857 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
20858 to 16byte boundary. Exact wording is:
20860 An array uses the same alignment as its elements, except that a local or
20861 global array variable of length at least 16 bytes or
20862 a C99 variable-length array variable always has alignment of at least 16 bytes.
20864 This was added to allow use of aligned SSE instructions at arrays. This
20865 rule is meant for static storage (where compiler can not do the analysis
20866 by itself). We follow it for automatic variables only when convenient.
20867 We fully control everything in the function compiled and functions from
20868 other unit can not rely on the alignment.
20870 Exclude va_list type. It is the common case of local array where
20871 we can not benefit from the alignment. */
20872 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
20875 if (AGGREGATE_TYPE_P (type)
20876 && (TYPE_MAIN_VARIANT (type)
20877 != TYPE_MAIN_VARIANT (va_list_type_node))
20878 && TYPE_SIZE (type)
20879 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
20880 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
20881 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
20884 if (TREE_CODE (type) == ARRAY_TYPE)
20886 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
20888 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
20891 else if (TREE_CODE (type) == COMPLEX_TYPE)
20893 if (TYPE_MODE (type) == DCmode && align < 64)
20895 if ((TYPE_MODE (type) == XCmode
20896 || TYPE_MODE (type) == TCmode) && align < 128)
20899 else if ((TREE_CODE (type) == RECORD_TYPE
20900 || TREE_CODE (type) == UNION_TYPE
20901 || TREE_CODE (type) == QUAL_UNION_TYPE)
20902 && TYPE_FIELDS (type))
20904 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
20906 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
20909 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
20910 || TREE_CODE (type) == INTEGER_TYPE)
20913 if (TYPE_MODE (type) == DFmode && align < 64)
20915 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
20921 /* Compute the minimum required alignment for dynamic stack realignment
20922 purposes for a local variable, parameter or a stack slot. EXP is
20923 the data type or decl itself, MODE is its mode and ALIGN is the
20924 alignment that the object would ordinarily have. */
20927 ix86_minimum_alignment (tree exp, enum machine_mode mode,
20928 unsigned int align)
20932 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
20935 if (exp && DECL_P (exp))
20937 type = TREE_TYPE (exp);
20946 /* Don't do dynamic stack realignment for long long objects with
20947 -mpreferred-stack-boundary=2. */
20948 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
20949 && (!type || !TYPE_USER_ALIGN (type))
20950 && (!decl || !DECL_USER_ALIGN (decl)))
20956 /* Find a location for the static chain incoming to a nested function.
20957 This is a register, unless all free registers are used by arguments. */
20960 ix86_static_chain (const_tree fndecl, bool incoming_p)
20964 if (!DECL_STATIC_CHAIN (fndecl))
20969 /* We always use R10 in 64-bit mode. */
20975 /* By default in 32-bit mode we use ECX to pass the static chain. */
20978 fntype = TREE_TYPE (fndecl);
20979 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
20981 /* Fastcall functions use ecx/edx for arguments, which leaves
20982 us with EAX for the static chain. */
20985 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
20987 /* Thiscall functions use ecx for arguments, which leaves
20988 us with EAX for the static chain. */
20991 else if (ix86_function_regparm (fntype, fndecl) == 3)
20993 /* For regparm 3, we have no free call-clobbered registers in
20994 which to store the static chain. In order to implement this,
20995 we have the trampoline push the static chain to the stack.
20996 However, we can't push a value below the return address when
20997 we call the nested function directly, so we have to use an
20998 alternate entry point. For this we use ESI, and have the
20999 alternate entry point push ESI, so that things appear the
21000 same once we're executing the nested function. */
21003 if (fndecl == current_function_decl)
21004 ix86_static_chain_on_stack = true;
21005 return gen_frame_mem (SImode,
21006 plus_constant (arg_pointer_rtx, -8));
21012 return gen_rtx_REG (Pmode, regno);
21015 /* Emit RTL insns to initialize the variable parts of a trampoline.
21016 FNDECL is the decl of the target address; M_TRAMP is a MEM for
21017 the trampoline, and CHAIN_VALUE is an RTX for the static chain
21018 to be passed to the target function. */
21021 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
21025 fnaddr = XEXP (DECL_RTL (fndecl), 0);
21032 /* Depending on the static chain location, either load a register
21033 with a constant, or push the constant to the stack. All of the
21034 instructions are the same size. */
21035 chain = ix86_static_chain (fndecl, true);
21038 if (REGNO (chain) == CX_REG)
21040 else if (REGNO (chain) == AX_REG)
21043 gcc_unreachable ();
21048 mem = adjust_address (m_tramp, QImode, 0);
21049 emit_move_insn (mem, gen_int_mode (opcode, QImode));
21051 mem = adjust_address (m_tramp, SImode, 1);
21052 emit_move_insn (mem, chain_value);
21054 /* Compute offset from the end of the jmp to the target function.
21055 In the case in which the trampoline stores the static chain on
21056 the stack, we need to skip the first insn which pushes the
21057 (call-saved) register static chain; this push is 1 byte. */
21058 disp = expand_binop (SImode, sub_optab, fnaddr,
21059 plus_constant (XEXP (m_tramp, 0),
21060 MEM_P (chain) ? 9 : 10),
21061 NULL_RTX, 1, OPTAB_DIRECT);
21063 mem = adjust_address (m_tramp, QImode, 5);
21064 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
21066 mem = adjust_address (m_tramp, SImode, 6);
21067 emit_move_insn (mem, disp);
21073 /* Load the function address to r11. Try to load address using
21074 the shorter movl instead of movabs. We may want to support
21075 movq for kernel mode, but kernel does not use trampolines at
21077 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
21079 fnaddr = copy_to_mode_reg (DImode, fnaddr);
21081 mem = adjust_address (m_tramp, HImode, offset);
21082 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
21084 mem = adjust_address (m_tramp, SImode, offset + 2);
21085 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
21090 mem = adjust_address (m_tramp, HImode, offset);
21091 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
21093 mem = adjust_address (m_tramp, DImode, offset + 2);
21094 emit_move_insn (mem, fnaddr);
21098 /* Load static chain using movabs to r10. */
21099 mem = adjust_address (m_tramp, HImode, offset);
21100 emit_move_insn (mem, gen_int_mode (0xba49, HImode));
21102 mem = adjust_address (m_tramp, DImode, offset + 2);
21103 emit_move_insn (mem, chain_value);
21106 /* Jump to r11; the last (unused) byte is a nop, only there to
21107 pad the write out to a single 32-bit store. */
21108 mem = adjust_address (m_tramp, SImode, offset);
21109 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
21112 gcc_assert (offset <= TRAMPOLINE_SIZE);
21115 #ifdef ENABLE_EXECUTE_STACK
21116 #ifdef CHECK_EXECUTE_STACK_ENABLED
21117 if (CHECK_EXECUTE_STACK_ENABLED)
21119 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
21120 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
21124 /* The following file contains several enumerations and data structures
21125 built from the definitions in i386-builtin-types.def. */
21127 #include "i386-builtin-types.inc"
21129 /* Table for the ix86 builtin non-function types. */
21130 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
21132 /* Retrieve an element from the above table, building some of
21133 the types lazily. */
21136 ix86_get_builtin_type (enum ix86_builtin_type tcode)
21138 unsigned int index;
21141 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
21143 type = ix86_builtin_type_tab[(int) tcode];
21147 gcc_assert (tcode > IX86_BT_LAST_PRIM);
21148 if (tcode <= IX86_BT_LAST_VECT)
21150 enum machine_mode mode;
21152 index = tcode - IX86_BT_LAST_PRIM - 1;
21153 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
21154 mode = ix86_builtin_type_vect_mode[index];
21156 type = build_vector_type_for_mode (itype, mode);
21162 index = tcode - IX86_BT_LAST_VECT - 1;
21163 if (tcode <= IX86_BT_LAST_PTR)
21164 quals = TYPE_UNQUALIFIED;
21166 quals = TYPE_QUAL_CONST;
21168 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
21169 if (quals != TYPE_UNQUALIFIED)
21170 itype = build_qualified_type (itype, quals);
21172 type = build_pointer_type (itype);
21175 ix86_builtin_type_tab[(int) tcode] = type;
21179 /* Table for the ix86 builtin function types. */
21180 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
21182 /* Retrieve an element from the above table, building some of
21183 the types lazily. */
21186 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
21190 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
21192 type = ix86_builtin_func_type_tab[(int) tcode];
21196 if (tcode <= IX86_BT_LAST_FUNC)
21198 unsigned start = ix86_builtin_func_start[(int) tcode];
21199 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
21200 tree rtype, atype, args = void_list_node;
21203 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
21204 for (i = after - 1; i > start; --i)
21206 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
21207 args = tree_cons (NULL, atype, args);
21210 type = build_function_type (rtype, args);
21214 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
21215 enum ix86_builtin_func_type icode;
21217 icode = ix86_builtin_func_alias_base[index];
21218 type = ix86_get_builtin_func_type (icode);
21221 ix86_builtin_func_type_tab[(int) tcode] = type;
21226 /* Codes for all the SSE/MMX builtins. */
21229 IX86_BUILTIN_ADDPS,
21230 IX86_BUILTIN_ADDSS,
21231 IX86_BUILTIN_DIVPS,
21232 IX86_BUILTIN_DIVSS,
21233 IX86_BUILTIN_MULPS,
21234 IX86_BUILTIN_MULSS,
21235 IX86_BUILTIN_SUBPS,
21236 IX86_BUILTIN_SUBSS,
21238 IX86_BUILTIN_CMPEQPS,
21239 IX86_BUILTIN_CMPLTPS,
21240 IX86_BUILTIN_CMPLEPS,
21241 IX86_BUILTIN_CMPGTPS,
21242 IX86_BUILTIN_CMPGEPS,
21243 IX86_BUILTIN_CMPNEQPS,
21244 IX86_BUILTIN_CMPNLTPS,
21245 IX86_BUILTIN_CMPNLEPS,
21246 IX86_BUILTIN_CMPNGTPS,
21247 IX86_BUILTIN_CMPNGEPS,
21248 IX86_BUILTIN_CMPORDPS,
21249 IX86_BUILTIN_CMPUNORDPS,
21250 IX86_BUILTIN_CMPEQSS,
21251 IX86_BUILTIN_CMPLTSS,
21252 IX86_BUILTIN_CMPLESS,
21253 IX86_BUILTIN_CMPNEQSS,
21254 IX86_BUILTIN_CMPNLTSS,
21255 IX86_BUILTIN_CMPNLESS,
21256 IX86_BUILTIN_CMPNGTSS,
21257 IX86_BUILTIN_CMPNGESS,
21258 IX86_BUILTIN_CMPORDSS,
21259 IX86_BUILTIN_CMPUNORDSS,
21261 IX86_BUILTIN_COMIEQSS,
21262 IX86_BUILTIN_COMILTSS,
21263 IX86_BUILTIN_COMILESS,
21264 IX86_BUILTIN_COMIGTSS,
21265 IX86_BUILTIN_COMIGESS,
21266 IX86_BUILTIN_COMINEQSS,
21267 IX86_BUILTIN_UCOMIEQSS,
21268 IX86_BUILTIN_UCOMILTSS,
21269 IX86_BUILTIN_UCOMILESS,
21270 IX86_BUILTIN_UCOMIGTSS,
21271 IX86_BUILTIN_UCOMIGESS,
21272 IX86_BUILTIN_UCOMINEQSS,
21274 IX86_BUILTIN_CVTPI2PS,
21275 IX86_BUILTIN_CVTPS2PI,
21276 IX86_BUILTIN_CVTSI2SS,
21277 IX86_BUILTIN_CVTSI642SS,
21278 IX86_BUILTIN_CVTSS2SI,
21279 IX86_BUILTIN_CVTSS2SI64,
21280 IX86_BUILTIN_CVTTPS2PI,
21281 IX86_BUILTIN_CVTTSS2SI,
21282 IX86_BUILTIN_CVTTSS2SI64,
21284 IX86_BUILTIN_MAXPS,
21285 IX86_BUILTIN_MAXSS,
21286 IX86_BUILTIN_MINPS,
21287 IX86_BUILTIN_MINSS,
21289 IX86_BUILTIN_LOADUPS,
21290 IX86_BUILTIN_STOREUPS,
21291 IX86_BUILTIN_MOVSS,
21293 IX86_BUILTIN_MOVHLPS,
21294 IX86_BUILTIN_MOVLHPS,
21295 IX86_BUILTIN_LOADHPS,
21296 IX86_BUILTIN_LOADLPS,
21297 IX86_BUILTIN_STOREHPS,
21298 IX86_BUILTIN_STORELPS,
21300 IX86_BUILTIN_MASKMOVQ,
21301 IX86_BUILTIN_MOVMSKPS,
21302 IX86_BUILTIN_PMOVMSKB,
21304 IX86_BUILTIN_MOVNTPS,
21305 IX86_BUILTIN_MOVNTQ,
21307 IX86_BUILTIN_LOADDQU,
21308 IX86_BUILTIN_STOREDQU,
21310 IX86_BUILTIN_PACKSSWB,
21311 IX86_BUILTIN_PACKSSDW,
21312 IX86_BUILTIN_PACKUSWB,
21314 IX86_BUILTIN_PADDB,
21315 IX86_BUILTIN_PADDW,
21316 IX86_BUILTIN_PADDD,
21317 IX86_BUILTIN_PADDQ,
21318 IX86_BUILTIN_PADDSB,
21319 IX86_BUILTIN_PADDSW,
21320 IX86_BUILTIN_PADDUSB,
21321 IX86_BUILTIN_PADDUSW,
21322 IX86_BUILTIN_PSUBB,
21323 IX86_BUILTIN_PSUBW,
21324 IX86_BUILTIN_PSUBD,
21325 IX86_BUILTIN_PSUBQ,
21326 IX86_BUILTIN_PSUBSB,
21327 IX86_BUILTIN_PSUBSW,
21328 IX86_BUILTIN_PSUBUSB,
21329 IX86_BUILTIN_PSUBUSW,
21332 IX86_BUILTIN_PANDN,
21336 IX86_BUILTIN_PAVGB,
21337 IX86_BUILTIN_PAVGW,
21339 IX86_BUILTIN_PCMPEQB,
21340 IX86_BUILTIN_PCMPEQW,
21341 IX86_BUILTIN_PCMPEQD,
21342 IX86_BUILTIN_PCMPGTB,
21343 IX86_BUILTIN_PCMPGTW,
21344 IX86_BUILTIN_PCMPGTD,
21346 IX86_BUILTIN_PMADDWD,
21348 IX86_BUILTIN_PMAXSW,
21349 IX86_BUILTIN_PMAXUB,
21350 IX86_BUILTIN_PMINSW,
21351 IX86_BUILTIN_PMINUB,
21353 IX86_BUILTIN_PMULHUW,
21354 IX86_BUILTIN_PMULHW,
21355 IX86_BUILTIN_PMULLW,
21357 IX86_BUILTIN_PSADBW,
21358 IX86_BUILTIN_PSHUFW,
21360 IX86_BUILTIN_PSLLW,
21361 IX86_BUILTIN_PSLLD,
21362 IX86_BUILTIN_PSLLQ,
21363 IX86_BUILTIN_PSRAW,
21364 IX86_BUILTIN_PSRAD,
21365 IX86_BUILTIN_PSRLW,
21366 IX86_BUILTIN_PSRLD,
21367 IX86_BUILTIN_PSRLQ,
21368 IX86_BUILTIN_PSLLWI,
21369 IX86_BUILTIN_PSLLDI,
21370 IX86_BUILTIN_PSLLQI,
21371 IX86_BUILTIN_PSRAWI,
21372 IX86_BUILTIN_PSRADI,
21373 IX86_BUILTIN_PSRLWI,
21374 IX86_BUILTIN_PSRLDI,
21375 IX86_BUILTIN_PSRLQI,
21377 IX86_BUILTIN_PUNPCKHBW,
21378 IX86_BUILTIN_PUNPCKHWD,
21379 IX86_BUILTIN_PUNPCKHDQ,
21380 IX86_BUILTIN_PUNPCKLBW,
21381 IX86_BUILTIN_PUNPCKLWD,
21382 IX86_BUILTIN_PUNPCKLDQ,
21384 IX86_BUILTIN_SHUFPS,
21386 IX86_BUILTIN_RCPPS,
21387 IX86_BUILTIN_RCPSS,
21388 IX86_BUILTIN_RSQRTPS,
21389 IX86_BUILTIN_RSQRTPS_NR,
21390 IX86_BUILTIN_RSQRTSS,
21391 IX86_BUILTIN_RSQRTF,
21392 IX86_BUILTIN_SQRTPS,
21393 IX86_BUILTIN_SQRTPS_NR,
21394 IX86_BUILTIN_SQRTSS,
21396 IX86_BUILTIN_UNPCKHPS,
21397 IX86_BUILTIN_UNPCKLPS,
21399 IX86_BUILTIN_ANDPS,
21400 IX86_BUILTIN_ANDNPS,
21402 IX86_BUILTIN_XORPS,
21405 IX86_BUILTIN_LDMXCSR,
21406 IX86_BUILTIN_STMXCSR,
21407 IX86_BUILTIN_SFENCE,
21409 /* 3DNow! Original */
21410 IX86_BUILTIN_FEMMS,
21411 IX86_BUILTIN_PAVGUSB,
21412 IX86_BUILTIN_PF2ID,
21413 IX86_BUILTIN_PFACC,
21414 IX86_BUILTIN_PFADD,
21415 IX86_BUILTIN_PFCMPEQ,
21416 IX86_BUILTIN_PFCMPGE,
21417 IX86_BUILTIN_PFCMPGT,
21418 IX86_BUILTIN_PFMAX,
21419 IX86_BUILTIN_PFMIN,
21420 IX86_BUILTIN_PFMUL,
21421 IX86_BUILTIN_PFRCP,
21422 IX86_BUILTIN_PFRCPIT1,
21423 IX86_BUILTIN_PFRCPIT2,
21424 IX86_BUILTIN_PFRSQIT1,
21425 IX86_BUILTIN_PFRSQRT,
21426 IX86_BUILTIN_PFSUB,
21427 IX86_BUILTIN_PFSUBR,
21428 IX86_BUILTIN_PI2FD,
21429 IX86_BUILTIN_PMULHRW,
21431 /* 3DNow! Athlon Extensions */
21432 IX86_BUILTIN_PF2IW,
21433 IX86_BUILTIN_PFNACC,
21434 IX86_BUILTIN_PFPNACC,
21435 IX86_BUILTIN_PI2FW,
21436 IX86_BUILTIN_PSWAPDSI,
21437 IX86_BUILTIN_PSWAPDSF,
21440 IX86_BUILTIN_ADDPD,
21441 IX86_BUILTIN_ADDSD,
21442 IX86_BUILTIN_DIVPD,
21443 IX86_BUILTIN_DIVSD,
21444 IX86_BUILTIN_MULPD,
21445 IX86_BUILTIN_MULSD,
21446 IX86_BUILTIN_SUBPD,
21447 IX86_BUILTIN_SUBSD,
21449 IX86_BUILTIN_CMPEQPD,
21450 IX86_BUILTIN_CMPLTPD,
21451 IX86_BUILTIN_CMPLEPD,
21452 IX86_BUILTIN_CMPGTPD,
21453 IX86_BUILTIN_CMPGEPD,
21454 IX86_BUILTIN_CMPNEQPD,
21455 IX86_BUILTIN_CMPNLTPD,
21456 IX86_BUILTIN_CMPNLEPD,
21457 IX86_BUILTIN_CMPNGTPD,
21458 IX86_BUILTIN_CMPNGEPD,
21459 IX86_BUILTIN_CMPORDPD,
21460 IX86_BUILTIN_CMPUNORDPD,
21461 IX86_BUILTIN_CMPEQSD,
21462 IX86_BUILTIN_CMPLTSD,
21463 IX86_BUILTIN_CMPLESD,
21464 IX86_BUILTIN_CMPNEQSD,
21465 IX86_BUILTIN_CMPNLTSD,
21466 IX86_BUILTIN_CMPNLESD,
21467 IX86_BUILTIN_CMPORDSD,
21468 IX86_BUILTIN_CMPUNORDSD,
21470 IX86_BUILTIN_COMIEQSD,
21471 IX86_BUILTIN_COMILTSD,
21472 IX86_BUILTIN_COMILESD,
21473 IX86_BUILTIN_COMIGTSD,
21474 IX86_BUILTIN_COMIGESD,
21475 IX86_BUILTIN_COMINEQSD,
21476 IX86_BUILTIN_UCOMIEQSD,
21477 IX86_BUILTIN_UCOMILTSD,
21478 IX86_BUILTIN_UCOMILESD,
21479 IX86_BUILTIN_UCOMIGTSD,
21480 IX86_BUILTIN_UCOMIGESD,
21481 IX86_BUILTIN_UCOMINEQSD,
21483 IX86_BUILTIN_MAXPD,
21484 IX86_BUILTIN_MAXSD,
21485 IX86_BUILTIN_MINPD,
21486 IX86_BUILTIN_MINSD,
21488 IX86_BUILTIN_ANDPD,
21489 IX86_BUILTIN_ANDNPD,
21491 IX86_BUILTIN_XORPD,
21493 IX86_BUILTIN_SQRTPD,
21494 IX86_BUILTIN_SQRTSD,
21496 IX86_BUILTIN_UNPCKHPD,
21497 IX86_BUILTIN_UNPCKLPD,
21499 IX86_BUILTIN_SHUFPD,
21501 IX86_BUILTIN_LOADUPD,
21502 IX86_BUILTIN_STOREUPD,
21503 IX86_BUILTIN_MOVSD,
21505 IX86_BUILTIN_LOADHPD,
21506 IX86_BUILTIN_LOADLPD,
21508 IX86_BUILTIN_CVTDQ2PD,
21509 IX86_BUILTIN_CVTDQ2PS,
21511 IX86_BUILTIN_CVTPD2DQ,
21512 IX86_BUILTIN_CVTPD2PI,
21513 IX86_BUILTIN_CVTPD2PS,
21514 IX86_BUILTIN_CVTTPD2DQ,
21515 IX86_BUILTIN_CVTTPD2PI,
21517 IX86_BUILTIN_CVTPI2PD,
21518 IX86_BUILTIN_CVTSI2SD,
21519 IX86_BUILTIN_CVTSI642SD,
21521 IX86_BUILTIN_CVTSD2SI,
21522 IX86_BUILTIN_CVTSD2SI64,
21523 IX86_BUILTIN_CVTSD2SS,
21524 IX86_BUILTIN_CVTSS2SD,
21525 IX86_BUILTIN_CVTTSD2SI,
21526 IX86_BUILTIN_CVTTSD2SI64,
21528 IX86_BUILTIN_CVTPS2DQ,
21529 IX86_BUILTIN_CVTPS2PD,
21530 IX86_BUILTIN_CVTTPS2DQ,
21532 IX86_BUILTIN_MOVNTI,
21533 IX86_BUILTIN_MOVNTPD,
21534 IX86_BUILTIN_MOVNTDQ,
21536 IX86_BUILTIN_MOVQ128,
21539 IX86_BUILTIN_MASKMOVDQU,
21540 IX86_BUILTIN_MOVMSKPD,
21541 IX86_BUILTIN_PMOVMSKB128,
21543 IX86_BUILTIN_PACKSSWB128,
21544 IX86_BUILTIN_PACKSSDW128,
21545 IX86_BUILTIN_PACKUSWB128,
21547 IX86_BUILTIN_PADDB128,
21548 IX86_BUILTIN_PADDW128,
21549 IX86_BUILTIN_PADDD128,
21550 IX86_BUILTIN_PADDQ128,
21551 IX86_BUILTIN_PADDSB128,
21552 IX86_BUILTIN_PADDSW128,
21553 IX86_BUILTIN_PADDUSB128,
21554 IX86_BUILTIN_PADDUSW128,
21555 IX86_BUILTIN_PSUBB128,
21556 IX86_BUILTIN_PSUBW128,
21557 IX86_BUILTIN_PSUBD128,
21558 IX86_BUILTIN_PSUBQ128,
21559 IX86_BUILTIN_PSUBSB128,
21560 IX86_BUILTIN_PSUBSW128,
21561 IX86_BUILTIN_PSUBUSB128,
21562 IX86_BUILTIN_PSUBUSW128,
21564 IX86_BUILTIN_PAND128,
21565 IX86_BUILTIN_PANDN128,
21566 IX86_BUILTIN_POR128,
21567 IX86_BUILTIN_PXOR128,
21569 IX86_BUILTIN_PAVGB128,
21570 IX86_BUILTIN_PAVGW128,
21572 IX86_BUILTIN_PCMPEQB128,
21573 IX86_BUILTIN_PCMPEQW128,
21574 IX86_BUILTIN_PCMPEQD128,
21575 IX86_BUILTIN_PCMPGTB128,
21576 IX86_BUILTIN_PCMPGTW128,
21577 IX86_BUILTIN_PCMPGTD128,
21579 IX86_BUILTIN_PMADDWD128,
21581 IX86_BUILTIN_PMAXSW128,
21582 IX86_BUILTIN_PMAXUB128,
21583 IX86_BUILTIN_PMINSW128,
21584 IX86_BUILTIN_PMINUB128,
21586 IX86_BUILTIN_PMULUDQ,
21587 IX86_BUILTIN_PMULUDQ128,
21588 IX86_BUILTIN_PMULHUW128,
21589 IX86_BUILTIN_PMULHW128,
21590 IX86_BUILTIN_PMULLW128,
21592 IX86_BUILTIN_PSADBW128,
21593 IX86_BUILTIN_PSHUFHW,
21594 IX86_BUILTIN_PSHUFLW,
21595 IX86_BUILTIN_PSHUFD,
21597 IX86_BUILTIN_PSLLDQI128,
21598 IX86_BUILTIN_PSLLWI128,
21599 IX86_BUILTIN_PSLLDI128,
21600 IX86_BUILTIN_PSLLQI128,
21601 IX86_BUILTIN_PSRAWI128,
21602 IX86_BUILTIN_PSRADI128,
21603 IX86_BUILTIN_PSRLDQI128,
21604 IX86_BUILTIN_PSRLWI128,
21605 IX86_BUILTIN_PSRLDI128,
21606 IX86_BUILTIN_PSRLQI128,
21608 IX86_BUILTIN_PSLLDQ128,
21609 IX86_BUILTIN_PSLLW128,
21610 IX86_BUILTIN_PSLLD128,
21611 IX86_BUILTIN_PSLLQ128,
21612 IX86_BUILTIN_PSRAW128,
21613 IX86_BUILTIN_PSRAD128,
21614 IX86_BUILTIN_PSRLW128,
21615 IX86_BUILTIN_PSRLD128,
21616 IX86_BUILTIN_PSRLQ128,
21618 IX86_BUILTIN_PUNPCKHBW128,
21619 IX86_BUILTIN_PUNPCKHWD128,
21620 IX86_BUILTIN_PUNPCKHDQ128,
21621 IX86_BUILTIN_PUNPCKHQDQ128,
21622 IX86_BUILTIN_PUNPCKLBW128,
21623 IX86_BUILTIN_PUNPCKLWD128,
21624 IX86_BUILTIN_PUNPCKLDQ128,
21625 IX86_BUILTIN_PUNPCKLQDQ128,
21627 IX86_BUILTIN_CLFLUSH,
21628 IX86_BUILTIN_MFENCE,
21629 IX86_BUILTIN_LFENCE,
21631 IX86_BUILTIN_BSRSI,
21632 IX86_BUILTIN_BSRDI,
21633 IX86_BUILTIN_RDPMC,
21634 IX86_BUILTIN_RDTSC,
21635 IX86_BUILTIN_RDTSCP,
21636 IX86_BUILTIN_ROLQI,
21637 IX86_BUILTIN_ROLHI,
21638 IX86_BUILTIN_RORQI,
21639 IX86_BUILTIN_RORHI,
21642 IX86_BUILTIN_ADDSUBPS,
21643 IX86_BUILTIN_HADDPS,
21644 IX86_BUILTIN_HSUBPS,
21645 IX86_BUILTIN_MOVSHDUP,
21646 IX86_BUILTIN_MOVSLDUP,
21647 IX86_BUILTIN_ADDSUBPD,
21648 IX86_BUILTIN_HADDPD,
21649 IX86_BUILTIN_HSUBPD,
21650 IX86_BUILTIN_LDDQU,
21652 IX86_BUILTIN_MONITOR,
21653 IX86_BUILTIN_MWAIT,
21656 IX86_BUILTIN_PHADDW,
21657 IX86_BUILTIN_PHADDD,
21658 IX86_BUILTIN_PHADDSW,
21659 IX86_BUILTIN_PHSUBW,
21660 IX86_BUILTIN_PHSUBD,
21661 IX86_BUILTIN_PHSUBSW,
21662 IX86_BUILTIN_PMADDUBSW,
21663 IX86_BUILTIN_PMULHRSW,
21664 IX86_BUILTIN_PSHUFB,
21665 IX86_BUILTIN_PSIGNB,
21666 IX86_BUILTIN_PSIGNW,
21667 IX86_BUILTIN_PSIGND,
21668 IX86_BUILTIN_PALIGNR,
21669 IX86_BUILTIN_PABSB,
21670 IX86_BUILTIN_PABSW,
21671 IX86_BUILTIN_PABSD,
21673 IX86_BUILTIN_PHADDW128,
21674 IX86_BUILTIN_PHADDD128,
21675 IX86_BUILTIN_PHADDSW128,
21676 IX86_BUILTIN_PHSUBW128,
21677 IX86_BUILTIN_PHSUBD128,
21678 IX86_BUILTIN_PHSUBSW128,
21679 IX86_BUILTIN_PMADDUBSW128,
21680 IX86_BUILTIN_PMULHRSW128,
21681 IX86_BUILTIN_PSHUFB128,
21682 IX86_BUILTIN_PSIGNB128,
21683 IX86_BUILTIN_PSIGNW128,
21684 IX86_BUILTIN_PSIGND128,
21685 IX86_BUILTIN_PALIGNR128,
21686 IX86_BUILTIN_PABSB128,
21687 IX86_BUILTIN_PABSW128,
21688 IX86_BUILTIN_PABSD128,
21690 /* AMDFAM10 - SSE4A New Instructions. */
21691 IX86_BUILTIN_MOVNTSD,
21692 IX86_BUILTIN_MOVNTSS,
21693 IX86_BUILTIN_EXTRQI,
21694 IX86_BUILTIN_EXTRQ,
21695 IX86_BUILTIN_INSERTQI,
21696 IX86_BUILTIN_INSERTQ,
21699 IX86_BUILTIN_BLENDPD,
21700 IX86_BUILTIN_BLENDPS,
21701 IX86_BUILTIN_BLENDVPD,
21702 IX86_BUILTIN_BLENDVPS,
21703 IX86_BUILTIN_PBLENDVB128,
21704 IX86_BUILTIN_PBLENDW128,
21709 IX86_BUILTIN_INSERTPS128,
21711 IX86_BUILTIN_MOVNTDQA,
21712 IX86_BUILTIN_MPSADBW128,
21713 IX86_BUILTIN_PACKUSDW128,
21714 IX86_BUILTIN_PCMPEQQ,
21715 IX86_BUILTIN_PHMINPOSUW128,
21717 IX86_BUILTIN_PMAXSB128,
21718 IX86_BUILTIN_PMAXSD128,
21719 IX86_BUILTIN_PMAXUD128,
21720 IX86_BUILTIN_PMAXUW128,
21722 IX86_BUILTIN_PMINSB128,
21723 IX86_BUILTIN_PMINSD128,
21724 IX86_BUILTIN_PMINUD128,
21725 IX86_BUILTIN_PMINUW128,
21727 IX86_BUILTIN_PMOVSXBW128,
21728 IX86_BUILTIN_PMOVSXBD128,
21729 IX86_BUILTIN_PMOVSXBQ128,
21730 IX86_BUILTIN_PMOVSXWD128,
21731 IX86_BUILTIN_PMOVSXWQ128,
21732 IX86_BUILTIN_PMOVSXDQ128,
21734 IX86_BUILTIN_PMOVZXBW128,
21735 IX86_BUILTIN_PMOVZXBD128,
21736 IX86_BUILTIN_PMOVZXBQ128,
21737 IX86_BUILTIN_PMOVZXWD128,
21738 IX86_BUILTIN_PMOVZXWQ128,
21739 IX86_BUILTIN_PMOVZXDQ128,
21741 IX86_BUILTIN_PMULDQ128,
21742 IX86_BUILTIN_PMULLD128,
21744 IX86_BUILTIN_ROUNDPD,
21745 IX86_BUILTIN_ROUNDPS,
21746 IX86_BUILTIN_ROUNDSD,
21747 IX86_BUILTIN_ROUNDSS,
21749 IX86_BUILTIN_PTESTZ,
21750 IX86_BUILTIN_PTESTC,
21751 IX86_BUILTIN_PTESTNZC,
21753 IX86_BUILTIN_VEC_INIT_V2SI,
21754 IX86_BUILTIN_VEC_INIT_V4HI,
21755 IX86_BUILTIN_VEC_INIT_V8QI,
21756 IX86_BUILTIN_VEC_EXT_V2DF,
21757 IX86_BUILTIN_VEC_EXT_V2DI,
21758 IX86_BUILTIN_VEC_EXT_V4SF,
21759 IX86_BUILTIN_VEC_EXT_V4SI,
21760 IX86_BUILTIN_VEC_EXT_V8HI,
21761 IX86_BUILTIN_VEC_EXT_V2SI,
21762 IX86_BUILTIN_VEC_EXT_V4HI,
21763 IX86_BUILTIN_VEC_EXT_V16QI,
21764 IX86_BUILTIN_VEC_SET_V2DI,
21765 IX86_BUILTIN_VEC_SET_V4SF,
21766 IX86_BUILTIN_VEC_SET_V4SI,
21767 IX86_BUILTIN_VEC_SET_V8HI,
21768 IX86_BUILTIN_VEC_SET_V4HI,
21769 IX86_BUILTIN_VEC_SET_V16QI,
21771 IX86_BUILTIN_VEC_PACK_SFIX,
21774 IX86_BUILTIN_CRC32QI,
21775 IX86_BUILTIN_CRC32HI,
21776 IX86_BUILTIN_CRC32SI,
21777 IX86_BUILTIN_CRC32DI,
21779 IX86_BUILTIN_PCMPESTRI128,
21780 IX86_BUILTIN_PCMPESTRM128,
21781 IX86_BUILTIN_PCMPESTRA128,
21782 IX86_BUILTIN_PCMPESTRC128,
21783 IX86_BUILTIN_PCMPESTRO128,
21784 IX86_BUILTIN_PCMPESTRS128,
21785 IX86_BUILTIN_PCMPESTRZ128,
21786 IX86_BUILTIN_PCMPISTRI128,
21787 IX86_BUILTIN_PCMPISTRM128,
21788 IX86_BUILTIN_PCMPISTRA128,
21789 IX86_BUILTIN_PCMPISTRC128,
21790 IX86_BUILTIN_PCMPISTRO128,
21791 IX86_BUILTIN_PCMPISTRS128,
21792 IX86_BUILTIN_PCMPISTRZ128,
21794 IX86_BUILTIN_PCMPGTQ,
21796 /* AES instructions */
21797 IX86_BUILTIN_AESENC128,
21798 IX86_BUILTIN_AESENCLAST128,
21799 IX86_BUILTIN_AESDEC128,
21800 IX86_BUILTIN_AESDECLAST128,
21801 IX86_BUILTIN_AESIMC128,
21802 IX86_BUILTIN_AESKEYGENASSIST128,
21804 /* PCLMUL instruction */
21805 IX86_BUILTIN_PCLMULQDQ128,
21808 IX86_BUILTIN_ADDPD256,
21809 IX86_BUILTIN_ADDPS256,
21810 IX86_BUILTIN_ADDSUBPD256,
21811 IX86_BUILTIN_ADDSUBPS256,
21812 IX86_BUILTIN_ANDPD256,
21813 IX86_BUILTIN_ANDPS256,
21814 IX86_BUILTIN_ANDNPD256,
21815 IX86_BUILTIN_ANDNPS256,
21816 IX86_BUILTIN_BLENDPD256,
21817 IX86_BUILTIN_BLENDPS256,
21818 IX86_BUILTIN_BLENDVPD256,
21819 IX86_BUILTIN_BLENDVPS256,
21820 IX86_BUILTIN_DIVPD256,
21821 IX86_BUILTIN_DIVPS256,
21822 IX86_BUILTIN_DPPS256,
21823 IX86_BUILTIN_HADDPD256,
21824 IX86_BUILTIN_HADDPS256,
21825 IX86_BUILTIN_HSUBPD256,
21826 IX86_BUILTIN_HSUBPS256,
21827 IX86_BUILTIN_MAXPD256,
21828 IX86_BUILTIN_MAXPS256,
21829 IX86_BUILTIN_MINPD256,
21830 IX86_BUILTIN_MINPS256,
21831 IX86_BUILTIN_MULPD256,
21832 IX86_BUILTIN_MULPS256,
21833 IX86_BUILTIN_ORPD256,
21834 IX86_BUILTIN_ORPS256,
21835 IX86_BUILTIN_SHUFPD256,
21836 IX86_BUILTIN_SHUFPS256,
21837 IX86_BUILTIN_SUBPD256,
21838 IX86_BUILTIN_SUBPS256,
21839 IX86_BUILTIN_XORPD256,
21840 IX86_BUILTIN_XORPS256,
21841 IX86_BUILTIN_CMPSD,
21842 IX86_BUILTIN_CMPSS,
21843 IX86_BUILTIN_CMPPD,
21844 IX86_BUILTIN_CMPPS,
21845 IX86_BUILTIN_CMPPD256,
21846 IX86_BUILTIN_CMPPS256,
21847 IX86_BUILTIN_CVTDQ2PD256,
21848 IX86_BUILTIN_CVTDQ2PS256,
21849 IX86_BUILTIN_CVTPD2PS256,
21850 IX86_BUILTIN_CVTPS2DQ256,
21851 IX86_BUILTIN_CVTPS2PD256,
21852 IX86_BUILTIN_CVTTPD2DQ256,
21853 IX86_BUILTIN_CVTPD2DQ256,
21854 IX86_BUILTIN_CVTTPS2DQ256,
21855 IX86_BUILTIN_EXTRACTF128PD256,
21856 IX86_BUILTIN_EXTRACTF128PS256,
21857 IX86_BUILTIN_EXTRACTF128SI256,
21858 IX86_BUILTIN_VZEROALL,
21859 IX86_BUILTIN_VZEROUPPER,
21860 IX86_BUILTIN_VPERMILVARPD,
21861 IX86_BUILTIN_VPERMILVARPS,
21862 IX86_BUILTIN_VPERMILVARPD256,
21863 IX86_BUILTIN_VPERMILVARPS256,
21864 IX86_BUILTIN_VPERMILPD,
21865 IX86_BUILTIN_VPERMILPS,
21866 IX86_BUILTIN_VPERMILPD256,
21867 IX86_BUILTIN_VPERMILPS256,
21868 IX86_BUILTIN_VPERMIL2PD,
21869 IX86_BUILTIN_VPERMIL2PS,
21870 IX86_BUILTIN_VPERMIL2PD256,
21871 IX86_BUILTIN_VPERMIL2PS256,
21872 IX86_BUILTIN_VPERM2F128PD256,
21873 IX86_BUILTIN_VPERM2F128PS256,
21874 IX86_BUILTIN_VPERM2F128SI256,
21875 IX86_BUILTIN_VBROADCASTSS,
21876 IX86_BUILTIN_VBROADCASTSD256,
21877 IX86_BUILTIN_VBROADCASTSS256,
21878 IX86_BUILTIN_VBROADCASTPD256,
21879 IX86_BUILTIN_VBROADCASTPS256,
21880 IX86_BUILTIN_VINSERTF128PD256,
21881 IX86_BUILTIN_VINSERTF128PS256,
21882 IX86_BUILTIN_VINSERTF128SI256,
21883 IX86_BUILTIN_LOADUPD256,
21884 IX86_BUILTIN_LOADUPS256,
21885 IX86_BUILTIN_STOREUPD256,
21886 IX86_BUILTIN_STOREUPS256,
21887 IX86_BUILTIN_LDDQU256,
21888 IX86_BUILTIN_MOVNTDQ256,
21889 IX86_BUILTIN_MOVNTPD256,
21890 IX86_BUILTIN_MOVNTPS256,
21891 IX86_BUILTIN_LOADDQU256,
21892 IX86_BUILTIN_STOREDQU256,
21893 IX86_BUILTIN_MASKLOADPD,
21894 IX86_BUILTIN_MASKLOADPS,
21895 IX86_BUILTIN_MASKSTOREPD,
21896 IX86_BUILTIN_MASKSTOREPS,
21897 IX86_BUILTIN_MASKLOADPD256,
21898 IX86_BUILTIN_MASKLOADPS256,
21899 IX86_BUILTIN_MASKSTOREPD256,
21900 IX86_BUILTIN_MASKSTOREPS256,
21901 IX86_BUILTIN_MOVSHDUP256,
21902 IX86_BUILTIN_MOVSLDUP256,
21903 IX86_BUILTIN_MOVDDUP256,
21905 IX86_BUILTIN_SQRTPD256,
21906 IX86_BUILTIN_SQRTPS256,
21907 IX86_BUILTIN_SQRTPS_NR256,
21908 IX86_BUILTIN_RSQRTPS256,
21909 IX86_BUILTIN_RSQRTPS_NR256,
21911 IX86_BUILTIN_RCPPS256,
21913 IX86_BUILTIN_ROUNDPD256,
21914 IX86_BUILTIN_ROUNDPS256,
21916 IX86_BUILTIN_UNPCKHPD256,
21917 IX86_BUILTIN_UNPCKLPD256,
21918 IX86_BUILTIN_UNPCKHPS256,
21919 IX86_BUILTIN_UNPCKLPS256,
21921 IX86_BUILTIN_SI256_SI,
21922 IX86_BUILTIN_PS256_PS,
21923 IX86_BUILTIN_PD256_PD,
21924 IX86_BUILTIN_SI_SI256,
21925 IX86_BUILTIN_PS_PS256,
21926 IX86_BUILTIN_PD_PD256,
21928 IX86_BUILTIN_VTESTZPD,
21929 IX86_BUILTIN_VTESTCPD,
21930 IX86_BUILTIN_VTESTNZCPD,
21931 IX86_BUILTIN_VTESTZPS,
21932 IX86_BUILTIN_VTESTCPS,
21933 IX86_BUILTIN_VTESTNZCPS,
21934 IX86_BUILTIN_VTESTZPD256,
21935 IX86_BUILTIN_VTESTCPD256,
21936 IX86_BUILTIN_VTESTNZCPD256,
21937 IX86_BUILTIN_VTESTZPS256,
21938 IX86_BUILTIN_VTESTCPS256,
21939 IX86_BUILTIN_VTESTNZCPS256,
21940 IX86_BUILTIN_PTESTZ256,
21941 IX86_BUILTIN_PTESTC256,
21942 IX86_BUILTIN_PTESTNZC256,
21944 IX86_BUILTIN_MOVMSKPD256,
21945 IX86_BUILTIN_MOVMSKPS256,
21947 /* TFmode support builtins. */
21949 IX86_BUILTIN_HUGE_VALQ,
21950 IX86_BUILTIN_FABSQ,
21951 IX86_BUILTIN_COPYSIGNQ,
21953 /* Vectorizer support builtins. */
21954 IX86_BUILTIN_CPYSGNPS,
21955 IX86_BUILTIN_CPYSGNPD,
21957 IX86_BUILTIN_CVTUDQ2PS,
21959 IX86_BUILTIN_VEC_PERM_V2DF,
21960 IX86_BUILTIN_VEC_PERM_V4SF,
21961 IX86_BUILTIN_VEC_PERM_V2DI,
21962 IX86_BUILTIN_VEC_PERM_V4SI,
21963 IX86_BUILTIN_VEC_PERM_V8HI,
21964 IX86_BUILTIN_VEC_PERM_V16QI,
21965 IX86_BUILTIN_VEC_PERM_V2DI_U,
21966 IX86_BUILTIN_VEC_PERM_V4SI_U,
21967 IX86_BUILTIN_VEC_PERM_V8HI_U,
21968 IX86_BUILTIN_VEC_PERM_V16QI_U,
21969 IX86_BUILTIN_VEC_PERM_V4DF,
21970 IX86_BUILTIN_VEC_PERM_V8SF,
21972 /* FMA4 and XOP instructions. */
21973 IX86_BUILTIN_VFMADDSS,
21974 IX86_BUILTIN_VFMADDSD,
21975 IX86_BUILTIN_VFMADDPS,
21976 IX86_BUILTIN_VFMADDPD,
21977 IX86_BUILTIN_VFMSUBSS,
21978 IX86_BUILTIN_VFMSUBSD,
21979 IX86_BUILTIN_VFMSUBPS,
21980 IX86_BUILTIN_VFMSUBPD,
21981 IX86_BUILTIN_VFMADDSUBPS,
21982 IX86_BUILTIN_VFMADDSUBPD,
21983 IX86_BUILTIN_VFMSUBADDPS,
21984 IX86_BUILTIN_VFMSUBADDPD,
21985 IX86_BUILTIN_VFNMADDSS,
21986 IX86_BUILTIN_VFNMADDSD,
21987 IX86_BUILTIN_VFNMADDPS,
21988 IX86_BUILTIN_VFNMADDPD,
21989 IX86_BUILTIN_VFNMSUBSS,
21990 IX86_BUILTIN_VFNMSUBSD,
21991 IX86_BUILTIN_VFNMSUBPS,
21992 IX86_BUILTIN_VFNMSUBPD,
21993 IX86_BUILTIN_VFMADDPS256,
21994 IX86_BUILTIN_VFMADDPD256,
21995 IX86_BUILTIN_VFMSUBPS256,
21996 IX86_BUILTIN_VFMSUBPD256,
21997 IX86_BUILTIN_VFMADDSUBPS256,
21998 IX86_BUILTIN_VFMADDSUBPD256,
21999 IX86_BUILTIN_VFMSUBADDPS256,
22000 IX86_BUILTIN_VFMSUBADDPD256,
22001 IX86_BUILTIN_VFNMADDPS256,
22002 IX86_BUILTIN_VFNMADDPD256,
22003 IX86_BUILTIN_VFNMSUBPS256,
22004 IX86_BUILTIN_VFNMSUBPD256,
22006 IX86_BUILTIN_VPCMOV,
22007 IX86_BUILTIN_VPCMOV_V2DI,
22008 IX86_BUILTIN_VPCMOV_V4SI,
22009 IX86_BUILTIN_VPCMOV_V8HI,
22010 IX86_BUILTIN_VPCMOV_V16QI,
22011 IX86_BUILTIN_VPCMOV_V4SF,
22012 IX86_BUILTIN_VPCMOV_V2DF,
22013 IX86_BUILTIN_VPCMOV256,
22014 IX86_BUILTIN_VPCMOV_V4DI256,
22015 IX86_BUILTIN_VPCMOV_V8SI256,
22016 IX86_BUILTIN_VPCMOV_V16HI256,
22017 IX86_BUILTIN_VPCMOV_V32QI256,
22018 IX86_BUILTIN_VPCMOV_V8SF256,
22019 IX86_BUILTIN_VPCMOV_V4DF256,
22021 IX86_BUILTIN_VPPERM,
22023 IX86_BUILTIN_VPMACSSWW,
22024 IX86_BUILTIN_VPMACSWW,
22025 IX86_BUILTIN_VPMACSSWD,
22026 IX86_BUILTIN_VPMACSWD,
22027 IX86_BUILTIN_VPMACSSDD,
22028 IX86_BUILTIN_VPMACSDD,
22029 IX86_BUILTIN_VPMACSSDQL,
22030 IX86_BUILTIN_VPMACSSDQH,
22031 IX86_BUILTIN_VPMACSDQL,
22032 IX86_BUILTIN_VPMACSDQH,
22033 IX86_BUILTIN_VPMADCSSWD,
22034 IX86_BUILTIN_VPMADCSWD,
22036 IX86_BUILTIN_VPHADDBW,
22037 IX86_BUILTIN_VPHADDBD,
22038 IX86_BUILTIN_VPHADDBQ,
22039 IX86_BUILTIN_VPHADDWD,
22040 IX86_BUILTIN_VPHADDWQ,
22041 IX86_BUILTIN_VPHADDDQ,
22042 IX86_BUILTIN_VPHADDUBW,
22043 IX86_BUILTIN_VPHADDUBD,
22044 IX86_BUILTIN_VPHADDUBQ,
22045 IX86_BUILTIN_VPHADDUWD,
22046 IX86_BUILTIN_VPHADDUWQ,
22047 IX86_BUILTIN_VPHADDUDQ,
22048 IX86_BUILTIN_VPHSUBBW,
22049 IX86_BUILTIN_VPHSUBWD,
22050 IX86_BUILTIN_VPHSUBDQ,
22052 IX86_BUILTIN_VPROTB,
22053 IX86_BUILTIN_VPROTW,
22054 IX86_BUILTIN_VPROTD,
22055 IX86_BUILTIN_VPROTQ,
22056 IX86_BUILTIN_VPROTB_IMM,
22057 IX86_BUILTIN_VPROTW_IMM,
22058 IX86_BUILTIN_VPROTD_IMM,
22059 IX86_BUILTIN_VPROTQ_IMM,
22061 IX86_BUILTIN_VPSHLB,
22062 IX86_BUILTIN_VPSHLW,
22063 IX86_BUILTIN_VPSHLD,
22064 IX86_BUILTIN_VPSHLQ,
22065 IX86_BUILTIN_VPSHAB,
22066 IX86_BUILTIN_VPSHAW,
22067 IX86_BUILTIN_VPSHAD,
22068 IX86_BUILTIN_VPSHAQ,
22070 IX86_BUILTIN_VFRCZSS,
22071 IX86_BUILTIN_VFRCZSD,
22072 IX86_BUILTIN_VFRCZPS,
22073 IX86_BUILTIN_VFRCZPD,
22074 IX86_BUILTIN_VFRCZPS256,
22075 IX86_BUILTIN_VFRCZPD256,
22077 IX86_BUILTIN_VPCOMEQUB,
22078 IX86_BUILTIN_VPCOMNEUB,
22079 IX86_BUILTIN_VPCOMLTUB,
22080 IX86_BUILTIN_VPCOMLEUB,
22081 IX86_BUILTIN_VPCOMGTUB,
22082 IX86_BUILTIN_VPCOMGEUB,
22083 IX86_BUILTIN_VPCOMFALSEUB,
22084 IX86_BUILTIN_VPCOMTRUEUB,
22086 IX86_BUILTIN_VPCOMEQUW,
22087 IX86_BUILTIN_VPCOMNEUW,
22088 IX86_BUILTIN_VPCOMLTUW,
22089 IX86_BUILTIN_VPCOMLEUW,
22090 IX86_BUILTIN_VPCOMGTUW,
22091 IX86_BUILTIN_VPCOMGEUW,
22092 IX86_BUILTIN_VPCOMFALSEUW,
22093 IX86_BUILTIN_VPCOMTRUEUW,
22095 IX86_BUILTIN_VPCOMEQUD,
22096 IX86_BUILTIN_VPCOMNEUD,
22097 IX86_BUILTIN_VPCOMLTUD,
22098 IX86_BUILTIN_VPCOMLEUD,
22099 IX86_BUILTIN_VPCOMGTUD,
22100 IX86_BUILTIN_VPCOMGEUD,
22101 IX86_BUILTIN_VPCOMFALSEUD,
22102 IX86_BUILTIN_VPCOMTRUEUD,
22104 IX86_BUILTIN_VPCOMEQUQ,
22105 IX86_BUILTIN_VPCOMNEUQ,
22106 IX86_BUILTIN_VPCOMLTUQ,
22107 IX86_BUILTIN_VPCOMLEUQ,
22108 IX86_BUILTIN_VPCOMGTUQ,
22109 IX86_BUILTIN_VPCOMGEUQ,
22110 IX86_BUILTIN_VPCOMFALSEUQ,
22111 IX86_BUILTIN_VPCOMTRUEUQ,
22113 IX86_BUILTIN_VPCOMEQB,
22114 IX86_BUILTIN_VPCOMNEB,
22115 IX86_BUILTIN_VPCOMLTB,
22116 IX86_BUILTIN_VPCOMLEB,
22117 IX86_BUILTIN_VPCOMGTB,
22118 IX86_BUILTIN_VPCOMGEB,
22119 IX86_BUILTIN_VPCOMFALSEB,
22120 IX86_BUILTIN_VPCOMTRUEB,
22122 IX86_BUILTIN_VPCOMEQW,
22123 IX86_BUILTIN_VPCOMNEW,
22124 IX86_BUILTIN_VPCOMLTW,
22125 IX86_BUILTIN_VPCOMLEW,
22126 IX86_BUILTIN_VPCOMGTW,
22127 IX86_BUILTIN_VPCOMGEW,
22128 IX86_BUILTIN_VPCOMFALSEW,
22129 IX86_BUILTIN_VPCOMTRUEW,
22131 IX86_BUILTIN_VPCOMEQD,
22132 IX86_BUILTIN_VPCOMNED,
22133 IX86_BUILTIN_VPCOMLTD,
22134 IX86_BUILTIN_VPCOMLED,
22135 IX86_BUILTIN_VPCOMGTD,
22136 IX86_BUILTIN_VPCOMGED,
22137 IX86_BUILTIN_VPCOMFALSED,
22138 IX86_BUILTIN_VPCOMTRUED,
22140 IX86_BUILTIN_VPCOMEQQ,
22141 IX86_BUILTIN_VPCOMNEQ,
22142 IX86_BUILTIN_VPCOMLTQ,
22143 IX86_BUILTIN_VPCOMLEQ,
22144 IX86_BUILTIN_VPCOMGTQ,
22145 IX86_BUILTIN_VPCOMGEQ,
22146 IX86_BUILTIN_VPCOMFALSEQ,
22147 IX86_BUILTIN_VPCOMTRUEQ,
22149 /* LWP instructions. */
22150 IX86_BUILTIN_LLWPCB,
22151 IX86_BUILTIN_SLWPCB,
22152 IX86_BUILTIN_LWPVAL32,
22153 IX86_BUILTIN_LWPVAL64,
22154 IX86_BUILTIN_LWPINS32,
22155 IX86_BUILTIN_LWPINS64,
22159 /* FSGSBASE instructions. */
22160 IX86_BUILTIN_RDFSBASE32,
22161 IX86_BUILTIN_RDFSBASE64,
22162 IX86_BUILTIN_RDGSBASE32,
22163 IX86_BUILTIN_RDGSBASE64,
22164 IX86_BUILTIN_WRFSBASE32,
22165 IX86_BUILTIN_WRFSBASE64,
22166 IX86_BUILTIN_WRGSBASE32,
22167 IX86_BUILTIN_WRGSBASE64,
22169 /* RDRND instructions. */
22170 IX86_BUILTIN_RDRAND16,
22171 IX86_BUILTIN_RDRAND32,
22172 IX86_BUILTIN_RDRAND64,
22174 /* F16C instructions. */
22175 IX86_BUILTIN_CVTPH2PS,
22176 IX86_BUILTIN_CVTPH2PS256,
22177 IX86_BUILTIN_CVTPS2PH,
22178 IX86_BUILTIN_CVTPS2PH256,
22183 /* Table for the ix86 builtin decls. */
22184 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
22186 /* Table of all of the builtin functions that are possible with different ISA's
22187 but are waiting to be built until a function is declared to use that
22189 struct builtin_isa {
22190 const char *name; /* function name */
22191 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
22192 int isa; /* isa_flags this builtin is defined for */
22193 bool const_p; /* true if the declaration is constant */
22194 bool set_and_not_built_p;
22197 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
22200 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
22201 of which isa_flags to use in the ix86_builtins_isa array. Stores the
22202 function decl in the ix86_builtins array. Returns the function decl or
22203 NULL_TREE, if the builtin was not added.
22205 If the front end has a special hook for builtin functions, delay adding
22206 builtin functions that aren't in the current ISA until the ISA is changed
22207 with function specific optimization. Doing so, can save about 300K for the
22208 default compiler. When the builtin is expanded, check at that time whether
22211 If the front end doesn't have a special hook, record all builtins, even if
22212 it isn't an instruction set in the current ISA in case the user uses
22213 function specific options for a different ISA, so that we don't get scope
22214 errors if a builtin is added in the middle of a function scope. */
22217 def_builtin (int mask, const char *name, enum ix86_builtin_func_type tcode,
22218 enum ix86_builtins code)
22220 tree decl = NULL_TREE;
22222 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
22224 ix86_builtins_isa[(int) code].isa = mask;
22226 mask &= ~OPTION_MASK_ISA_64BIT;
22228 || (mask & ix86_isa_flags) != 0
22229 || (lang_hooks.builtin_function
22230 == lang_hooks.builtin_function_ext_scope))
22233 tree type = ix86_get_builtin_func_type (tcode);
22234 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
22236 ix86_builtins[(int) code] = decl;
22237 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
22241 ix86_builtins[(int) code] = NULL_TREE;
22242 ix86_builtins_isa[(int) code].tcode = tcode;
22243 ix86_builtins_isa[(int) code].name = name;
22244 ix86_builtins_isa[(int) code].const_p = false;
22245 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
22252 /* Like def_builtin, but also marks the function decl "const". */
22255 def_builtin_const (int mask, const char *name,
22256 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
22258 tree decl = def_builtin (mask, name, tcode, code);
22260 TREE_READONLY (decl) = 1;
22262 ix86_builtins_isa[(int) code].const_p = true;
22267 /* Add any new builtin functions for a given ISA that may not have been
22268 declared. This saves a bit of space compared to adding all of the
22269 declarations to the tree, even if we didn't use them. */
22272 ix86_add_new_builtins (int isa)
22276 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
22278 if ((ix86_builtins_isa[i].isa & isa) != 0
22279 && ix86_builtins_isa[i].set_and_not_built_p)
22283 /* Don't define the builtin again. */
22284 ix86_builtins_isa[i].set_and_not_built_p = false;
22286 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
22287 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
22288 type, i, BUILT_IN_MD, NULL,
22291 ix86_builtins[i] = decl;
22292 if (ix86_builtins_isa[i].const_p)
22293 TREE_READONLY (decl) = 1;
22298 /* Bits for builtin_description.flag. */
22300 /* Set when we don't support the comparison natively, and should
22301 swap_comparison in order to support it. */
22302 #define BUILTIN_DESC_SWAP_OPERANDS 1
22304 struct builtin_description
22306 const unsigned int mask;
22307 const enum insn_code icode;
22308 const char *const name;
22309 const enum ix86_builtins code;
22310 const enum rtx_code comparison;
22314 static const struct builtin_description bdesc_comi[] =
22316 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
22317 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
22318 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
22319 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
22320 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
22321 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
22322 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
22323 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
22324 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
22325 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
22326 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
22327 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
22328 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
22329 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
22330 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
22331 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
22332 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
22333 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
22334 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
22335 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
22336 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
22337 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
22338 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
22339 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
22342 static const struct builtin_description bdesc_pcmpestr[] =
22345 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
22346 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
22347 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
22348 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
22349 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
22350 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
22351 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
22354 static const struct builtin_description bdesc_pcmpistr[] =
22357 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
22358 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
22359 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
22360 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
22361 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
22362 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
22363 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
22366 /* Special builtins with variable number of arguments. */
22367 static const struct builtin_description bdesc_special_args[] =
22369 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
22370 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
22373 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
22376 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
22379 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
22380 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
22381 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
22383 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
22384 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
22385 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
22386 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
22388 /* SSE or 3DNow!A */
22389 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
22390 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
22393 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
22394 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
22395 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
22396 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
22397 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
22398 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
22399 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
22400 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
22401 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
22403 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
22404 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
22407 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
22410 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
22413 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
22414 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
22417 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
22418 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
22420 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
22421 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
22422 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
22423 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
22424 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
22426 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
22427 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
22428 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
22429 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
22430 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
22431 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
22432 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
22434 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
22435 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
22436 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
22438 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
22439 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
22440 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
22441 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
22442 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
22443 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
22444 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
22445 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
22447 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
22448 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
22449 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
22450 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
22451 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
22452 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
22455 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
22456 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
22457 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
22458 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
22459 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
22460 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
22461 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
22462 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
22465 { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandhi, "__builtin_ia32_rdrand16", IX86_BUILTIN_RDRAND16, UNKNOWN, (int) UINT16_FTYPE_VOID },
22466 { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandsi, "__builtin_ia32_rdrand32", IX86_BUILTIN_RDRAND32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
22467 { OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT, CODE_FOR_rdranddi, "__builtin_ia32_rdrand64", IX86_BUILTIN_RDRAND64, UNKNOWN, (int) UINT64_FTYPE_VOID },
22470 /* Builtins with variable number of arguments. */
22471 static const struct builtin_description bdesc_args[] =
22473 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
22474 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
22475 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
22476 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
22477 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
22478 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
22479 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
22482 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22483 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22484 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22485 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22486 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22487 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22489 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22490 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22491 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22492 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22493 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22494 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22495 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22496 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22498 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22499 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22501 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22502 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22503 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22504 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22506 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22507 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22508 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22509 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22510 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22511 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22513 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22514 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22515 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22516 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22517 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
22518 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
22520 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
22521 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
22522 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
22524 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
22526 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
22527 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
22528 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
22529 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
22530 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
22531 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
22533 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
22534 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
22535 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
22536 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
22537 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
22538 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
22540 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
22541 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
22542 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
22543 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
22546 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
22547 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
22548 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
22549 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
22551 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22552 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22553 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22554 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
22555 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
22556 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
22557 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22558 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22559 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22560 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22561 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22562 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22563 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22564 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22565 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22568 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
22569 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
22570 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
22571 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
22572 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22573 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22576 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
22577 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22578 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22579 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22580 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22581 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22582 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
22583 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
22584 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
22585 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
22586 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
22587 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
22589 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22591 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22592 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22593 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22594 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22595 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22596 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22597 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22598 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22600 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
22601 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
22602 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
22603 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22604 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22605 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
22606 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
22607 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
22608 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
22609 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22610 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
22611 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
22612 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
22613 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
22614 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
22615 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
22616 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
22617 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
22618 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
22619 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22620 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22621 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
22623 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22624 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22625 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22626 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22628 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22629 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22630 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22631 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22633 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22635 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22636 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22637 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22638 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22639 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22641 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
22642 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
22643 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
22645 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
22647 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
22648 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
22649 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
22651 /* SSE MMX or 3Dnow!A */
22652 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22653 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22654 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22656 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22657 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22658 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22659 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22661 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
22662 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
22664 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
22667 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22669 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
22670 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
22671 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
22672 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
22673 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
22674 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
22675 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI },
22676 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
22677 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
22678 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
22679 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
22680 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
22682 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
22683 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
22684 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
22685 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
22686 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
22687 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
22689 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
22690 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
22691 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
22692 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
22693 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
22695 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
22697 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
22698 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
22699 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
22700 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
22702 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
22703 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
22704 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
22706 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22707 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22708 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22709 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22710 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22711 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22712 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22713 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22715 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
22716 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
22717 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
22718 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
22719 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
22720 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
22721 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
22722 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
22723 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
22724 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
22725 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
22726 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
22727 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
22728 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
22729 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
22730 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
22731 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
22732 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
22733 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
22734 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
22736 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22737 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22738 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22739 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22741 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22742 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22743 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22744 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22746 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22748 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22749 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22750 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22752 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
22754 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22755 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22756 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22757 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22758 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22759 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22760 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22761 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22763 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22764 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22765 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22766 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22767 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22768 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22769 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22770 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22772 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22773 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
22775 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22776 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22777 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22778 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22780 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22781 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22783 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22784 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22785 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22786 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22787 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22788 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22790 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22791 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22792 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22793 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22795 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22796 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22797 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22798 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22799 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22800 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22801 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22802 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22804 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
22805 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
22806 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
22808 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22809 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
22811 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
22812 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
22814 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
22816 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
22817 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
22818 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
22819 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
22821 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
22822 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
22823 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
22824 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
22825 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
22826 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
22827 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
22829 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
22830 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
22831 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
22832 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
22833 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
22834 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
22835 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
22837 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
22838 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
22839 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
22840 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
22842 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
22843 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
22844 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
22846 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
22848 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
22849 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
22851 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
22854 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
22855 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
22858 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
22859 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22861 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22862 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22863 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22864 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22865 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22866 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22869 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
22870 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
22871 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
22872 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
22873 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
22874 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
22876 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22877 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22878 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22879 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22880 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22881 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22882 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22883 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22884 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22885 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22886 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22887 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22888 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
22889 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
22890 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22891 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22892 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22893 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22894 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22895 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22896 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22897 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22898 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22899 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22902 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
22903 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
22906 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22907 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22908 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
22909 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
22910 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22911 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22912 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22913 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
22914 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
22915 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
22917 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
22918 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
22919 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
22920 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
22921 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
22922 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
22923 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
22924 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
22925 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
22926 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
22927 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
22928 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
22929 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
22931 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
22932 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22933 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22934 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22935 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22936 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22937 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22938 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22939 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22940 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22941 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
22942 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22945 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
22946 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
22947 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22948 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22950 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
22951 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
22952 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
22955 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22956 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
22957 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
22958 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
22959 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
22962 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
22963 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
22964 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
22965 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22968 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
22969 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
22971 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22972 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22973 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22974 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22977 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
22980 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22981 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22982 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22983 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22984 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22985 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22986 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22987 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22988 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22989 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22990 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22991 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22992 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22993 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22994 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22995 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22996 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22997 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22998 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22999 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23000 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23001 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23002 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23003 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23004 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23005 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23007 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
23008 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
23009 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
23010 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
23012 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23013 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23014 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
23015 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
23016 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23017 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23018 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23019 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23020 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23021 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23022 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23023 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23024 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23025 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
23026 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
23027 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
23028 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
23029 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
23030 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
23031 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
23032 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
23033 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
23034 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
23035 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
23036 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23037 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23038 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
23039 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
23040 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
23041 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
23042 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
23043 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
23044 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
23045 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
23047 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23048 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23049 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
23051 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
23052 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23053 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23054 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23055 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23057 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23059 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
23060 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
23062 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23063 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23064 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23065 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23067 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
23068 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
23069 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
23070 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
23071 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
23072 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
23074 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23075 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23076 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23077 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23078 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23079 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23080 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23081 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23082 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23083 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23084 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23085 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23086 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23087 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23088 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23090 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
23091 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
23093 { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
23096 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
23097 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
23098 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
23099 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
23102 /* FMA4 and XOP. */
23103 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
23104 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
23105 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
23106 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
23107 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
23108 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
23109 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
23110 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
23111 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
23112 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
23113 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
23114 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
23115 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
23116 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
23117 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
23118 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
23119 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
23120 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
23121 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
23122 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
23123 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
23124 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
23125 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
23126 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
23127 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
23128 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
23129 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
23130 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
23131 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
23132 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
23133 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
23134 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
23135 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
23136 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
23137 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
23138 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
23139 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
23140 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
23141 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
23142 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
23143 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
23144 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
23145 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
23146 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
23147 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
23148 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
23149 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
23150 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
23151 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
23152 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
23153 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
23154 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
23156 static const struct builtin_description bdesc_multi_arg[] =
23158 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmaddv4sf4, "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23159 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmaddv2df4, "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23160 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4sf4, "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23161 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv2df4, "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23162 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv4sf4, "__builtin_ia32_vfmsubss", IX86_BUILTIN_VFMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23163 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv2df4, "__builtin_ia32_vfmsubsd", IX86_BUILTIN_VFMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23164 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4sf4, "__builtin_ia32_vfmsubps", IX86_BUILTIN_VFMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23165 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv2df4, "__builtin_ia32_vfmsubpd", IX86_BUILTIN_VFMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23167 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv4sf4, "__builtin_ia32_vfnmaddss", IX86_BUILTIN_VFNMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23168 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv2df4, "__builtin_ia32_vfnmaddsd", IX86_BUILTIN_VFNMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23169 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4sf4, "__builtin_ia32_vfnmaddps", IX86_BUILTIN_VFNMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23170 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv2df4, "__builtin_ia32_vfnmaddpd", IX86_BUILTIN_VFNMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23171 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmsubv4sf4, "__builtin_ia32_vfnmsubss", IX86_BUILTIN_VFNMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23172 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmsubv2df4, "__builtin_ia32_vfnmsubsd", IX86_BUILTIN_VFNMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23173 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv4sf4, "__builtin_ia32_vfnmsubps", IX86_BUILTIN_VFNMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23174 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv2df4, "__builtin_ia32_vfnmsubpd", IX86_BUILTIN_VFNMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23176 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4sf4, "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23177 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv2df4, "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23178 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4sf4, "__builtin_ia32_vfmsubaddps", IX86_BUILTIN_VFMSUBADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23179 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv2df4, "__builtin_ia32_vfmsubaddpd", IX86_BUILTIN_VFMSUBADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23181 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv8sf4256, "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23182 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4df4256, "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23183 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv8sf4256, "__builtin_ia32_vfmsubps256", IX86_BUILTIN_VFMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23184 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4df4256, "__builtin_ia32_vfmsubpd256", IX86_BUILTIN_VFMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23186 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv8sf4256, "__builtin_ia32_vfnmaddps256", IX86_BUILTIN_VFNMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23187 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4df4256, "__builtin_ia32_vfnmaddpd256", IX86_BUILTIN_VFNMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23188 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv8sf4256, "__builtin_ia32_vfnmsubps256", IX86_BUILTIN_VFNMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23189 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv4df4256, "__builtin_ia32_vfnmsubpd256", IX86_BUILTIN_VFNMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23191 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv8sf4, "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23192 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4df4, "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23193 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv8sf4, "__builtin_ia32_vfmsubaddps256", IX86_BUILTIN_VFMSUBADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23194 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4df4, "__builtin_ia32_vfmsubaddpd256", IX86_BUILTIN_VFMSUBADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23196 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
23197 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
23198 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
23199 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
23200 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
23201 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
23202 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
23204 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
23205 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
23206 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
23207 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
23208 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
23209 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23210 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23212 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
23214 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
23215 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
23216 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23217 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23218 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
23219 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
23220 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23221 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23222 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23223 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23224 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23225 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23227 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
23228 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
23229 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
23230 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
23231 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
23232 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
23233 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
23234 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
23235 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
23236 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
23237 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
23238 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
23239 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
23240 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
23241 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
23242 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
23244 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
23245 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
23246 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
23247 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
23248 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2256, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
23249 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2256, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
23251 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
23252 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
23253 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
23254 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
23255 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
23256 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
23257 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
23258 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
23259 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
23260 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
23261 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
23262 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
23263 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
23264 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
23265 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
23267 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
23268 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
23269 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
23270 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
23271 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
23272 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
23273 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
23275 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
23276 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
23277 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
23278 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
23279 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
23280 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
23281 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
23283 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
23284 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
23285 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
23286 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
23287 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
23288 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
23289 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
23291 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
23292 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
23293 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
23294 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
23295 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
23296 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
23297 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
23299 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
23300 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
23301 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
23302 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
23303 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
23304 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
23305 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
23307 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
23308 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
23309 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
23310 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
23311 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
23312 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
23313 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
23315 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
23316 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
23317 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
23318 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
23319 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
23320 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
23321 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
23323 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
23324 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
23325 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
23326 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
23327 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
23328 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
23329 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
23331 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
23332 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
23333 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
23334 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
23335 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
23336 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
23337 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
23338 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
23340 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
23341 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
23342 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
23343 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
23344 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
23345 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
23346 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
23347 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
23349 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
23350 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
23351 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
23352 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
23356 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
23357 in the current target ISA to allow the user to compile particular modules
23358 with different target specific options that differ from the command line
23361 ix86_init_mmx_sse_builtins (void)
23363 const struct builtin_description * d;
23364 enum ix86_builtin_func_type ftype;
23367 /* Add all special builtins with variable number of operands. */
23368 for (i = 0, d = bdesc_special_args;
23369 i < ARRAY_SIZE (bdesc_special_args);
23375 ftype = (enum ix86_builtin_func_type) d->flag;
23376 def_builtin (d->mask, d->name, ftype, d->code);
23379 /* Add all builtins with variable number of operands. */
23380 for (i = 0, d = bdesc_args;
23381 i < ARRAY_SIZE (bdesc_args);
23387 ftype = (enum ix86_builtin_func_type) d->flag;
23388 def_builtin_const (d->mask, d->name, ftype, d->code);
23391 /* pcmpestr[im] insns. */
23392 for (i = 0, d = bdesc_pcmpestr;
23393 i < ARRAY_SIZE (bdesc_pcmpestr);
23396 if (d->code == IX86_BUILTIN_PCMPESTRM128)
23397 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
23399 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
23400 def_builtin_const (d->mask, d->name, ftype, d->code);
23403 /* pcmpistr[im] insns. */
23404 for (i = 0, d = bdesc_pcmpistr;
23405 i < ARRAY_SIZE (bdesc_pcmpistr);
23408 if (d->code == IX86_BUILTIN_PCMPISTRM128)
23409 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
23411 ftype = INT_FTYPE_V16QI_V16QI_INT;
23412 def_builtin_const (d->mask, d->name, ftype, d->code);
23415 /* comi/ucomi insns. */
23416 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
23418 if (d->mask == OPTION_MASK_ISA_SSE2)
23419 ftype = INT_FTYPE_V2DF_V2DF;
23421 ftype = INT_FTYPE_V4SF_V4SF;
23422 def_builtin_const (d->mask, d->name, ftype, d->code);
23426 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
23427 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
23428 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
23429 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
23431 /* SSE or 3DNow!A */
23432 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
23433 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
23434 IX86_BUILTIN_MASKMOVQ);
23437 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
23438 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
23440 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
23441 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
23442 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
23443 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
23446 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
23447 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
23448 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
23449 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
23452 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
23453 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
23454 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
23455 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
23456 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
23457 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
23458 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
23459 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
23460 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
23461 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
23462 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
23463 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
23466 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
23467 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
23469 /* MMX access to the vec_init patterns. */
23470 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
23471 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
23473 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
23474 V4HI_FTYPE_HI_HI_HI_HI,
23475 IX86_BUILTIN_VEC_INIT_V4HI);
23477 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
23478 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
23479 IX86_BUILTIN_VEC_INIT_V8QI);
23481 /* Access to the vec_extract patterns. */
23482 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
23483 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
23484 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
23485 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
23486 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
23487 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
23488 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
23489 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
23490 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
23491 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
23493 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
23494 "__builtin_ia32_vec_ext_v4hi",
23495 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
23497 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
23498 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
23500 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
23501 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
23503 /* Access to the vec_set patterns. */
23504 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
23505 "__builtin_ia32_vec_set_v2di",
23506 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
23508 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
23509 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
23511 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
23512 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
23514 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
23515 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
23517 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
23518 "__builtin_ia32_vec_set_v4hi",
23519 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
23521 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
23522 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
23524 /* Add FMA4 multi-arg argument instructions */
23525 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
23530 ftype = (enum ix86_builtin_func_type) d->flag;
23531 def_builtin_const (d->mask, d->name, ftype, d->code);
23535 /* Internal method for ix86_init_builtins. */
23538 ix86_init_builtins_va_builtins_abi (void)
23540 tree ms_va_ref, sysv_va_ref;
23541 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
23542 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
23543 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
23544 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
23548 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
23549 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
23550 ms_va_ref = build_reference_type (ms_va_list_type_node);
23552 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
23555 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23556 fnvoid_va_start_ms =
23557 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23558 fnvoid_va_end_sysv =
23559 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
23560 fnvoid_va_start_sysv =
23561 build_varargs_function_type_list (void_type_node, sysv_va_ref,
23563 fnvoid_va_copy_ms =
23564 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
23566 fnvoid_va_copy_sysv =
23567 build_function_type_list (void_type_node, sysv_va_ref,
23568 sysv_va_ref, NULL_TREE);
23570 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
23571 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
23572 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
23573 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
23574 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
23575 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
23576 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
23577 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23578 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
23579 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23580 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
23581 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23585 ix86_init_builtin_types (void)
23587 tree float128_type_node, float80_type_node;
23589 /* The __float80 type. */
23590 float80_type_node = long_double_type_node;
23591 if (TYPE_MODE (float80_type_node) != XFmode)
23593 /* The __float80 type. */
23594 float80_type_node = make_node (REAL_TYPE);
23596 TYPE_PRECISION (float80_type_node) = 80;
23597 layout_type (float80_type_node);
23599 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
23601 /* The __float128 type. */
23602 float128_type_node = make_node (REAL_TYPE);
23603 TYPE_PRECISION (float128_type_node) = 128;
23604 layout_type (float128_type_node);
23605 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
23607 /* This macro is built by i386-builtin-types.awk. */
23608 DEFINE_BUILTIN_PRIMITIVE_TYPES;
23612 ix86_init_builtins (void)
23616 ix86_init_builtin_types ();
23618 /* TFmode support builtins. */
23619 def_builtin_const (0, "__builtin_infq",
23620 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
23621 def_builtin_const (0, "__builtin_huge_valq",
23622 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
23624 /* We will expand them to normal call if SSE2 isn't available since
23625 they are used by libgcc. */
23626 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
23627 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
23628 BUILT_IN_MD, "__fabstf2", NULL_TREE);
23629 TREE_READONLY (t) = 1;
23630 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
23632 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
23633 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
23634 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
23635 TREE_READONLY (t) = 1;
23636 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
23638 ix86_init_mmx_sse_builtins ();
23641 ix86_init_builtins_va_builtins_abi ();
23644 /* Return the ix86 builtin for CODE. */
23647 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
23649 if (code >= IX86_BUILTIN_MAX)
23650 return error_mark_node;
23652 return ix86_builtins[code];
23655 /* Errors in the source file can cause expand_expr to return const0_rtx
23656 where we expect a vector. To avoid crashing, use one of the vector
23657 clear instructions. */
23659 safe_vector_operand (rtx x, enum machine_mode mode)
23661 if (x == const0_rtx)
23662 x = CONST0_RTX (mode);
23666 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
23669 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
23672 tree arg0 = CALL_EXPR_ARG (exp, 0);
23673 tree arg1 = CALL_EXPR_ARG (exp, 1);
23674 rtx op0 = expand_normal (arg0);
23675 rtx op1 = expand_normal (arg1);
23676 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23677 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
23678 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
23680 if (VECTOR_MODE_P (mode0))
23681 op0 = safe_vector_operand (op0, mode0);
23682 if (VECTOR_MODE_P (mode1))
23683 op1 = safe_vector_operand (op1, mode1);
23685 if (optimize || !target
23686 || GET_MODE (target) != tmode
23687 || !insn_data[icode].operand[0].predicate (target, tmode))
23688 target = gen_reg_rtx (tmode);
23690 if (GET_MODE (op1) == SImode && mode1 == TImode)
23692 rtx x = gen_reg_rtx (V4SImode);
23693 emit_insn (gen_sse2_loadd (x, op1));
23694 op1 = gen_lowpart (TImode, x);
23697 if (!insn_data[icode].operand[1].predicate (op0, mode0))
23698 op0 = copy_to_mode_reg (mode0, op0);
23699 if (!insn_data[icode].operand[2].predicate (op1, mode1))
23700 op1 = copy_to_mode_reg (mode1, op1);
23702 pat = GEN_FCN (icode) (target, op0, op1);
23711 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
23714 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
23715 enum ix86_builtin_func_type m_type,
23716 enum rtx_code sub_code)
23721 bool comparison_p = false;
23723 bool last_arg_constant = false;
23724 int num_memory = 0;
23727 enum machine_mode mode;
23730 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23734 case MULTI_ARG_4_DF2_DI_I:
23735 case MULTI_ARG_4_DF2_DI_I1:
23736 case MULTI_ARG_4_SF2_SI_I:
23737 case MULTI_ARG_4_SF2_SI_I1:
23739 last_arg_constant = true;
23742 case MULTI_ARG_3_SF:
23743 case MULTI_ARG_3_DF:
23744 case MULTI_ARG_3_SF2:
23745 case MULTI_ARG_3_DF2:
23746 case MULTI_ARG_3_DI:
23747 case MULTI_ARG_3_SI:
23748 case MULTI_ARG_3_SI_DI:
23749 case MULTI_ARG_3_HI:
23750 case MULTI_ARG_3_HI_SI:
23751 case MULTI_ARG_3_QI:
23752 case MULTI_ARG_3_DI2:
23753 case MULTI_ARG_3_SI2:
23754 case MULTI_ARG_3_HI2:
23755 case MULTI_ARG_3_QI2:
23759 case MULTI_ARG_2_SF:
23760 case MULTI_ARG_2_DF:
23761 case MULTI_ARG_2_DI:
23762 case MULTI_ARG_2_SI:
23763 case MULTI_ARG_2_HI:
23764 case MULTI_ARG_2_QI:
23768 case MULTI_ARG_2_DI_IMM:
23769 case MULTI_ARG_2_SI_IMM:
23770 case MULTI_ARG_2_HI_IMM:
23771 case MULTI_ARG_2_QI_IMM:
23773 last_arg_constant = true;
23776 case MULTI_ARG_1_SF:
23777 case MULTI_ARG_1_DF:
23778 case MULTI_ARG_1_SF2:
23779 case MULTI_ARG_1_DF2:
23780 case MULTI_ARG_1_DI:
23781 case MULTI_ARG_1_SI:
23782 case MULTI_ARG_1_HI:
23783 case MULTI_ARG_1_QI:
23784 case MULTI_ARG_1_SI_DI:
23785 case MULTI_ARG_1_HI_DI:
23786 case MULTI_ARG_1_HI_SI:
23787 case MULTI_ARG_1_QI_DI:
23788 case MULTI_ARG_1_QI_SI:
23789 case MULTI_ARG_1_QI_HI:
23793 case MULTI_ARG_2_DI_CMP:
23794 case MULTI_ARG_2_SI_CMP:
23795 case MULTI_ARG_2_HI_CMP:
23796 case MULTI_ARG_2_QI_CMP:
23798 comparison_p = true;
23801 case MULTI_ARG_2_SF_TF:
23802 case MULTI_ARG_2_DF_TF:
23803 case MULTI_ARG_2_DI_TF:
23804 case MULTI_ARG_2_SI_TF:
23805 case MULTI_ARG_2_HI_TF:
23806 case MULTI_ARG_2_QI_TF:
23812 gcc_unreachable ();
23815 if (optimize || !target
23816 || GET_MODE (target) != tmode
23817 || !insn_data[icode].operand[0].predicate (target, tmode))
23818 target = gen_reg_rtx (tmode);
23820 gcc_assert (nargs <= 4);
23822 for (i = 0; i < nargs; i++)
23824 tree arg = CALL_EXPR_ARG (exp, i);
23825 rtx op = expand_normal (arg);
23826 int adjust = (comparison_p) ? 1 : 0;
23827 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
23829 if (last_arg_constant && i == nargs-1)
23831 if (!CONST_INT_P (op))
23833 error ("last argument must be an immediate");
23834 return gen_reg_rtx (tmode);
23839 if (VECTOR_MODE_P (mode))
23840 op = safe_vector_operand (op, mode);
23842 /* If we aren't optimizing, only allow one memory operand to be
23844 if (memory_operand (op, mode))
23847 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
23850 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
23852 op = force_reg (mode, op);
23856 args[i].mode = mode;
23862 pat = GEN_FCN (icode) (target, args[0].op);
23867 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
23868 GEN_INT ((int)sub_code));
23869 else if (! comparison_p)
23870 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
23873 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
23877 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
23882 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
23886 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
23890 gcc_unreachable ();
23900 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
23901 insns with vec_merge. */
23904 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
23908 tree arg0 = CALL_EXPR_ARG (exp, 0);
23909 rtx op1, op0 = expand_normal (arg0);
23910 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23911 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
23913 if (optimize || !target
23914 || GET_MODE (target) != tmode
23915 || !insn_data[icode].operand[0].predicate (target, tmode))
23916 target = gen_reg_rtx (tmode);
23918 if (VECTOR_MODE_P (mode0))
23919 op0 = safe_vector_operand (op0, mode0);
23921 if ((optimize && !register_operand (op0, mode0))
23922 || !insn_data[icode].operand[1].predicate (op0, mode0))
23923 op0 = copy_to_mode_reg (mode0, op0);
23926 if (!insn_data[icode].operand[2].predicate (op1, mode0))
23927 op1 = copy_to_mode_reg (mode0, op1);
23929 pat = GEN_FCN (icode) (target, op0, op1);
23936 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
23939 ix86_expand_sse_compare (const struct builtin_description *d,
23940 tree exp, rtx target, bool swap)
23943 tree arg0 = CALL_EXPR_ARG (exp, 0);
23944 tree arg1 = CALL_EXPR_ARG (exp, 1);
23945 rtx op0 = expand_normal (arg0);
23946 rtx op1 = expand_normal (arg1);
23948 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
23949 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
23950 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
23951 enum rtx_code comparison = d->comparison;
23953 if (VECTOR_MODE_P (mode0))
23954 op0 = safe_vector_operand (op0, mode0);
23955 if (VECTOR_MODE_P (mode1))
23956 op1 = safe_vector_operand (op1, mode1);
23958 /* Swap operands if we have a comparison that isn't available in
23962 rtx tmp = gen_reg_rtx (mode1);
23963 emit_move_insn (tmp, op1);
23968 if (optimize || !target
23969 || GET_MODE (target) != tmode
23970 || !insn_data[d->icode].operand[0].predicate (target, tmode))
23971 target = gen_reg_rtx (tmode);
23973 if ((optimize && !register_operand (op0, mode0))
23974 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
23975 op0 = copy_to_mode_reg (mode0, op0);
23976 if ((optimize && !register_operand (op1, mode1))
23977 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
23978 op1 = copy_to_mode_reg (mode1, op1);
23980 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
23981 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
23988 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
23991 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
23995 tree arg0 = CALL_EXPR_ARG (exp, 0);
23996 tree arg1 = CALL_EXPR_ARG (exp, 1);
23997 rtx op0 = expand_normal (arg0);
23998 rtx op1 = expand_normal (arg1);
23999 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24000 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24001 enum rtx_code comparison = d->comparison;
24003 if (VECTOR_MODE_P (mode0))
24004 op0 = safe_vector_operand (op0, mode0);
24005 if (VECTOR_MODE_P (mode1))
24006 op1 = safe_vector_operand (op1, mode1);
24008 /* Swap operands if we have a comparison that isn't available in
24010 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
24017 target = gen_reg_rtx (SImode);
24018 emit_move_insn (target, const0_rtx);
24019 target = gen_rtx_SUBREG (QImode, target, 0);
24021 if ((optimize && !register_operand (op0, mode0))
24022 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
24023 op0 = copy_to_mode_reg (mode0, op0);
24024 if ((optimize && !register_operand (op1, mode1))
24025 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
24026 op1 = copy_to_mode_reg (mode1, op1);
24028 pat = GEN_FCN (d->icode) (op0, op1);
24032 emit_insn (gen_rtx_SET (VOIDmode,
24033 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24034 gen_rtx_fmt_ee (comparison, QImode,
24038 return SUBREG_REG (target);
24041 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
24044 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
24048 tree arg0 = CALL_EXPR_ARG (exp, 0);
24049 tree arg1 = CALL_EXPR_ARG (exp, 1);
24050 rtx op0 = expand_normal (arg0);
24051 rtx op1 = expand_normal (arg1);
24052 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24053 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24054 enum rtx_code comparison = d->comparison;
24056 if (VECTOR_MODE_P (mode0))
24057 op0 = safe_vector_operand (op0, mode0);
24058 if (VECTOR_MODE_P (mode1))
24059 op1 = safe_vector_operand (op1, mode1);
24061 target = gen_reg_rtx (SImode);
24062 emit_move_insn (target, const0_rtx);
24063 target = gen_rtx_SUBREG (QImode, target, 0);
24065 if ((optimize && !register_operand (op0, mode0))
24066 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
24067 op0 = copy_to_mode_reg (mode0, op0);
24068 if ((optimize && !register_operand (op1, mode1))
24069 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
24070 op1 = copy_to_mode_reg (mode1, op1);
24072 pat = GEN_FCN (d->icode) (op0, op1);
24076 emit_insn (gen_rtx_SET (VOIDmode,
24077 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24078 gen_rtx_fmt_ee (comparison, QImode,
24082 return SUBREG_REG (target);
24085 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
24088 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
24089 tree exp, rtx target)
24092 tree arg0 = CALL_EXPR_ARG (exp, 0);
24093 tree arg1 = CALL_EXPR_ARG (exp, 1);
24094 tree arg2 = CALL_EXPR_ARG (exp, 2);
24095 tree arg3 = CALL_EXPR_ARG (exp, 3);
24096 tree arg4 = CALL_EXPR_ARG (exp, 4);
24097 rtx scratch0, scratch1;
24098 rtx op0 = expand_normal (arg0);
24099 rtx op1 = expand_normal (arg1);
24100 rtx op2 = expand_normal (arg2);
24101 rtx op3 = expand_normal (arg3);
24102 rtx op4 = expand_normal (arg4);
24103 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
24105 tmode0 = insn_data[d->icode].operand[0].mode;
24106 tmode1 = insn_data[d->icode].operand[1].mode;
24107 modev2 = insn_data[d->icode].operand[2].mode;
24108 modei3 = insn_data[d->icode].operand[3].mode;
24109 modev4 = insn_data[d->icode].operand[4].mode;
24110 modei5 = insn_data[d->icode].operand[5].mode;
24111 modeimm = insn_data[d->icode].operand[6].mode;
24113 if (VECTOR_MODE_P (modev2))
24114 op0 = safe_vector_operand (op0, modev2);
24115 if (VECTOR_MODE_P (modev4))
24116 op2 = safe_vector_operand (op2, modev4);
24118 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
24119 op0 = copy_to_mode_reg (modev2, op0);
24120 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
24121 op1 = copy_to_mode_reg (modei3, op1);
24122 if ((optimize && !register_operand (op2, modev4))
24123 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
24124 op2 = copy_to_mode_reg (modev4, op2);
24125 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
24126 op3 = copy_to_mode_reg (modei5, op3);
24128 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
24130 error ("the fifth argument must be a 8-bit immediate");
24134 if (d->code == IX86_BUILTIN_PCMPESTRI128)
24136 if (optimize || !target
24137 || GET_MODE (target) != tmode0
24138 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
24139 target = gen_reg_rtx (tmode0);
24141 scratch1 = gen_reg_rtx (tmode1);
24143 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
24145 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
24147 if (optimize || !target
24148 || GET_MODE (target) != tmode1
24149 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
24150 target = gen_reg_rtx (tmode1);
24152 scratch0 = gen_reg_rtx (tmode0);
24154 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
24158 gcc_assert (d->flag);
24160 scratch0 = gen_reg_rtx (tmode0);
24161 scratch1 = gen_reg_rtx (tmode1);
24163 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
24173 target = gen_reg_rtx (SImode);
24174 emit_move_insn (target, const0_rtx);
24175 target = gen_rtx_SUBREG (QImode, target, 0);
24178 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24179 gen_rtx_fmt_ee (EQ, QImode,
24180 gen_rtx_REG ((enum machine_mode) d->flag,
24183 return SUBREG_REG (target);
24190 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
24193 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
24194 tree exp, rtx target)
24197 tree arg0 = CALL_EXPR_ARG (exp, 0);
24198 tree arg1 = CALL_EXPR_ARG (exp, 1);
24199 tree arg2 = CALL_EXPR_ARG (exp, 2);
24200 rtx scratch0, scratch1;
24201 rtx op0 = expand_normal (arg0);
24202 rtx op1 = expand_normal (arg1);
24203 rtx op2 = expand_normal (arg2);
24204 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
24206 tmode0 = insn_data[d->icode].operand[0].mode;
24207 tmode1 = insn_data[d->icode].operand[1].mode;
24208 modev2 = insn_data[d->icode].operand[2].mode;
24209 modev3 = insn_data[d->icode].operand[3].mode;
24210 modeimm = insn_data[d->icode].operand[4].mode;
24212 if (VECTOR_MODE_P (modev2))
24213 op0 = safe_vector_operand (op0, modev2);
24214 if (VECTOR_MODE_P (modev3))
24215 op1 = safe_vector_operand (op1, modev3);
24217 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
24218 op0 = copy_to_mode_reg (modev2, op0);
24219 if ((optimize && !register_operand (op1, modev3))
24220 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
24221 op1 = copy_to_mode_reg (modev3, op1);
24223 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
24225 error ("the third argument must be a 8-bit immediate");
24229 if (d->code == IX86_BUILTIN_PCMPISTRI128)
24231 if (optimize || !target
24232 || GET_MODE (target) != tmode0
24233 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
24234 target = gen_reg_rtx (tmode0);
24236 scratch1 = gen_reg_rtx (tmode1);
24238 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
24240 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
24242 if (optimize || !target
24243 || GET_MODE (target) != tmode1
24244 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
24245 target = gen_reg_rtx (tmode1);
24247 scratch0 = gen_reg_rtx (tmode0);
24249 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
24253 gcc_assert (d->flag);
24255 scratch0 = gen_reg_rtx (tmode0);
24256 scratch1 = gen_reg_rtx (tmode1);
24258 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
24268 target = gen_reg_rtx (SImode);
24269 emit_move_insn (target, const0_rtx);
24270 target = gen_rtx_SUBREG (QImode, target, 0);
24273 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24274 gen_rtx_fmt_ee (EQ, QImode,
24275 gen_rtx_REG ((enum machine_mode) d->flag,
24278 return SUBREG_REG (target);
24284 /* Subroutine of ix86_expand_builtin to take care of insns with
24285 variable number of operands. */
24288 ix86_expand_args_builtin (const struct builtin_description *d,
24289 tree exp, rtx target)
24291 rtx pat, real_target;
24292 unsigned int i, nargs;
24293 unsigned int nargs_constant = 0;
24294 int num_memory = 0;
24298 enum machine_mode mode;
24300 bool last_arg_count = false;
24301 enum insn_code icode = d->icode;
24302 const struct insn_data_d *insn_p = &insn_data[icode];
24303 enum machine_mode tmode = insn_p->operand[0].mode;
24304 enum machine_mode rmode = VOIDmode;
24306 enum rtx_code comparison = d->comparison;
24308 switch ((enum ix86_builtin_func_type) d->flag)
24310 case INT_FTYPE_V8SF_V8SF_PTEST:
24311 case INT_FTYPE_V4DI_V4DI_PTEST:
24312 case INT_FTYPE_V4DF_V4DF_PTEST:
24313 case INT_FTYPE_V4SF_V4SF_PTEST:
24314 case INT_FTYPE_V2DI_V2DI_PTEST:
24315 case INT_FTYPE_V2DF_V2DF_PTEST:
24316 return ix86_expand_sse_ptest (d, exp, target);
24317 case FLOAT128_FTYPE_FLOAT128:
24318 case FLOAT_FTYPE_FLOAT:
24319 case INT_FTYPE_INT:
24320 case UINT64_FTYPE_INT:
24321 case UINT16_FTYPE_UINT16:
24322 case INT64_FTYPE_INT64:
24323 case INT64_FTYPE_V4SF:
24324 case INT64_FTYPE_V2DF:
24325 case INT_FTYPE_V16QI:
24326 case INT_FTYPE_V8QI:
24327 case INT_FTYPE_V8SF:
24328 case INT_FTYPE_V4DF:
24329 case INT_FTYPE_V4SF:
24330 case INT_FTYPE_V2DF:
24331 case V16QI_FTYPE_V16QI:
24332 case V8SI_FTYPE_V8SF:
24333 case V8SI_FTYPE_V4SI:
24334 case V8HI_FTYPE_V8HI:
24335 case V8HI_FTYPE_V16QI:
24336 case V8QI_FTYPE_V8QI:
24337 case V8SF_FTYPE_V8SF:
24338 case V8SF_FTYPE_V8SI:
24339 case V8SF_FTYPE_V4SF:
24340 case V8SF_FTYPE_V8HI:
24341 case V4SI_FTYPE_V4SI:
24342 case V4SI_FTYPE_V16QI:
24343 case V4SI_FTYPE_V4SF:
24344 case V4SI_FTYPE_V8SI:
24345 case V4SI_FTYPE_V8HI:
24346 case V4SI_FTYPE_V4DF:
24347 case V4SI_FTYPE_V2DF:
24348 case V4HI_FTYPE_V4HI:
24349 case V4DF_FTYPE_V4DF:
24350 case V4DF_FTYPE_V4SI:
24351 case V4DF_FTYPE_V4SF:
24352 case V4DF_FTYPE_V2DF:
24353 case V4SF_FTYPE_V4SF:
24354 case V4SF_FTYPE_V4SI:
24355 case V4SF_FTYPE_V8SF:
24356 case V4SF_FTYPE_V4DF:
24357 case V4SF_FTYPE_V8HI:
24358 case V4SF_FTYPE_V2DF:
24359 case V2DI_FTYPE_V2DI:
24360 case V2DI_FTYPE_V16QI:
24361 case V2DI_FTYPE_V8HI:
24362 case V2DI_FTYPE_V4SI:
24363 case V2DF_FTYPE_V2DF:
24364 case V2DF_FTYPE_V4SI:
24365 case V2DF_FTYPE_V4DF:
24366 case V2DF_FTYPE_V4SF:
24367 case V2DF_FTYPE_V2SI:
24368 case V2SI_FTYPE_V2SI:
24369 case V2SI_FTYPE_V4SF:
24370 case V2SI_FTYPE_V2SF:
24371 case V2SI_FTYPE_V2DF:
24372 case V2SF_FTYPE_V2SF:
24373 case V2SF_FTYPE_V2SI:
24376 case V4SF_FTYPE_V4SF_VEC_MERGE:
24377 case V2DF_FTYPE_V2DF_VEC_MERGE:
24378 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
24379 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
24380 case V16QI_FTYPE_V16QI_V16QI:
24381 case V16QI_FTYPE_V8HI_V8HI:
24382 case V8QI_FTYPE_V8QI_V8QI:
24383 case V8QI_FTYPE_V4HI_V4HI:
24384 case V8HI_FTYPE_V8HI_V8HI:
24385 case V8HI_FTYPE_V16QI_V16QI:
24386 case V8HI_FTYPE_V4SI_V4SI:
24387 case V8SF_FTYPE_V8SF_V8SF:
24388 case V8SF_FTYPE_V8SF_V8SI:
24389 case V4SI_FTYPE_V4SI_V4SI:
24390 case V4SI_FTYPE_V8HI_V8HI:
24391 case V4SI_FTYPE_V4SF_V4SF:
24392 case V4SI_FTYPE_V2DF_V2DF:
24393 case V4HI_FTYPE_V4HI_V4HI:
24394 case V4HI_FTYPE_V8QI_V8QI:
24395 case V4HI_FTYPE_V2SI_V2SI:
24396 case V4DF_FTYPE_V4DF_V4DF:
24397 case V4DF_FTYPE_V4DF_V4DI:
24398 case V4SF_FTYPE_V4SF_V4SF:
24399 case V4SF_FTYPE_V4SF_V4SI:
24400 case V4SF_FTYPE_V4SF_V2SI:
24401 case V4SF_FTYPE_V4SF_V2DF:
24402 case V4SF_FTYPE_V4SF_DI:
24403 case V4SF_FTYPE_V4SF_SI:
24404 case V2DI_FTYPE_V2DI_V2DI:
24405 case V2DI_FTYPE_V16QI_V16QI:
24406 case V2DI_FTYPE_V4SI_V4SI:
24407 case V2DI_FTYPE_V2DI_V16QI:
24408 case V2DI_FTYPE_V2DF_V2DF:
24409 case V2SI_FTYPE_V2SI_V2SI:
24410 case V2SI_FTYPE_V4HI_V4HI:
24411 case V2SI_FTYPE_V2SF_V2SF:
24412 case V2DF_FTYPE_V2DF_V2DF:
24413 case V2DF_FTYPE_V2DF_V4SF:
24414 case V2DF_FTYPE_V2DF_V2DI:
24415 case V2DF_FTYPE_V2DF_DI:
24416 case V2DF_FTYPE_V2DF_SI:
24417 case V2SF_FTYPE_V2SF_V2SF:
24418 case V1DI_FTYPE_V1DI_V1DI:
24419 case V1DI_FTYPE_V8QI_V8QI:
24420 case V1DI_FTYPE_V2SI_V2SI:
24421 if (comparison == UNKNOWN)
24422 return ix86_expand_binop_builtin (icode, exp, target);
24425 case V4SF_FTYPE_V4SF_V4SF_SWAP:
24426 case V2DF_FTYPE_V2DF_V2DF_SWAP:
24427 gcc_assert (comparison != UNKNOWN);
24431 case V8HI_FTYPE_V8HI_V8HI_COUNT:
24432 case V8HI_FTYPE_V8HI_SI_COUNT:
24433 case V4SI_FTYPE_V4SI_V4SI_COUNT:
24434 case V4SI_FTYPE_V4SI_SI_COUNT:
24435 case V4HI_FTYPE_V4HI_V4HI_COUNT:
24436 case V4HI_FTYPE_V4HI_SI_COUNT:
24437 case V2DI_FTYPE_V2DI_V2DI_COUNT:
24438 case V2DI_FTYPE_V2DI_SI_COUNT:
24439 case V2SI_FTYPE_V2SI_V2SI_COUNT:
24440 case V2SI_FTYPE_V2SI_SI_COUNT:
24441 case V1DI_FTYPE_V1DI_V1DI_COUNT:
24442 case V1DI_FTYPE_V1DI_SI_COUNT:
24444 last_arg_count = true;
24446 case UINT64_FTYPE_UINT64_UINT64:
24447 case UINT_FTYPE_UINT_UINT:
24448 case UINT_FTYPE_UINT_USHORT:
24449 case UINT_FTYPE_UINT_UCHAR:
24450 case UINT16_FTYPE_UINT16_INT:
24451 case UINT8_FTYPE_UINT8_INT:
24454 case V2DI_FTYPE_V2DI_INT_CONVERT:
24457 nargs_constant = 1;
24459 case V8HI_FTYPE_V8HI_INT:
24460 case V8HI_FTYPE_V8SF_INT:
24461 case V8HI_FTYPE_V4SF_INT:
24462 case V8SF_FTYPE_V8SF_INT:
24463 case V4SI_FTYPE_V4SI_INT:
24464 case V4SI_FTYPE_V8SI_INT:
24465 case V4HI_FTYPE_V4HI_INT:
24466 case V4DF_FTYPE_V4DF_INT:
24467 case V4SF_FTYPE_V4SF_INT:
24468 case V4SF_FTYPE_V8SF_INT:
24469 case V2DI_FTYPE_V2DI_INT:
24470 case V2DF_FTYPE_V2DF_INT:
24471 case V2DF_FTYPE_V4DF_INT:
24473 nargs_constant = 1;
24475 case V16QI_FTYPE_V16QI_V16QI_V16QI:
24476 case V8SF_FTYPE_V8SF_V8SF_V8SF:
24477 case V4DF_FTYPE_V4DF_V4DF_V4DF:
24478 case V4SF_FTYPE_V4SF_V4SF_V4SF:
24479 case V2DF_FTYPE_V2DF_V2DF_V2DF:
24482 case V16QI_FTYPE_V16QI_V16QI_INT:
24483 case V8HI_FTYPE_V8HI_V8HI_INT:
24484 case V8SI_FTYPE_V8SI_V8SI_INT:
24485 case V8SI_FTYPE_V8SI_V4SI_INT:
24486 case V8SF_FTYPE_V8SF_V8SF_INT:
24487 case V8SF_FTYPE_V8SF_V4SF_INT:
24488 case V4SI_FTYPE_V4SI_V4SI_INT:
24489 case V4DF_FTYPE_V4DF_V4DF_INT:
24490 case V4DF_FTYPE_V4DF_V2DF_INT:
24491 case V4SF_FTYPE_V4SF_V4SF_INT:
24492 case V2DI_FTYPE_V2DI_V2DI_INT:
24493 case V2DF_FTYPE_V2DF_V2DF_INT:
24495 nargs_constant = 1;
24497 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
24500 nargs_constant = 1;
24502 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
24505 nargs_constant = 1;
24507 case V2DI_FTYPE_V2DI_UINT_UINT:
24509 nargs_constant = 2;
24511 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
24512 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
24513 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
24514 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
24516 nargs_constant = 1;
24518 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
24520 nargs_constant = 2;
24523 gcc_unreachable ();
24526 gcc_assert (nargs <= ARRAY_SIZE (args));
24528 if (comparison != UNKNOWN)
24530 gcc_assert (nargs == 2);
24531 return ix86_expand_sse_compare (d, exp, target, swap);
24534 if (rmode == VOIDmode || rmode == tmode)
24538 || GET_MODE (target) != tmode
24539 || !insn_p->operand[0].predicate (target, tmode))
24540 target = gen_reg_rtx (tmode);
24541 real_target = target;
24545 target = gen_reg_rtx (rmode);
24546 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
24549 for (i = 0; i < nargs; i++)
24551 tree arg = CALL_EXPR_ARG (exp, i);
24552 rtx op = expand_normal (arg);
24553 enum machine_mode mode = insn_p->operand[i + 1].mode;
24554 bool match = insn_p->operand[i + 1].predicate (op, mode);
24556 if (last_arg_count && (i + 1) == nargs)
24558 /* SIMD shift insns take either an 8-bit immediate or
24559 register as count. But builtin functions take int as
24560 count. If count doesn't match, we put it in register. */
24563 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
24564 if (!insn_p->operand[i + 1].predicate (op, mode))
24565 op = copy_to_reg (op);
24568 else if ((nargs - i) <= nargs_constant)
24573 case CODE_FOR_sse4_1_roundpd:
24574 case CODE_FOR_sse4_1_roundps:
24575 case CODE_FOR_sse4_1_roundsd:
24576 case CODE_FOR_sse4_1_roundss:
24577 case CODE_FOR_sse4_1_blendps:
24578 case CODE_FOR_avx_blendpd256:
24579 case CODE_FOR_avx_vpermilv4df:
24580 case CODE_FOR_avx_roundpd256:
24581 case CODE_FOR_avx_roundps256:
24582 error ("the last argument must be a 4-bit immediate");
24585 case CODE_FOR_sse4_1_blendpd:
24586 case CODE_FOR_avx_vpermilv2df:
24587 case CODE_FOR_xop_vpermil2v2df3:
24588 case CODE_FOR_xop_vpermil2v4sf3:
24589 case CODE_FOR_xop_vpermil2v4df3:
24590 case CODE_FOR_xop_vpermil2v8sf3:
24591 error ("the last argument must be a 2-bit immediate");
24594 case CODE_FOR_avx_vextractf128v4df:
24595 case CODE_FOR_avx_vextractf128v8sf:
24596 case CODE_FOR_avx_vextractf128v8si:
24597 case CODE_FOR_avx_vinsertf128v4df:
24598 case CODE_FOR_avx_vinsertf128v8sf:
24599 case CODE_FOR_avx_vinsertf128v8si:
24600 error ("the last argument must be a 1-bit immediate");
24603 case CODE_FOR_avx_cmpsdv2df3:
24604 case CODE_FOR_avx_cmpssv4sf3:
24605 case CODE_FOR_avx_cmppdv2df3:
24606 case CODE_FOR_avx_cmppsv4sf3:
24607 case CODE_FOR_avx_cmppdv4df3:
24608 case CODE_FOR_avx_cmppsv8sf3:
24609 error ("the last argument must be a 5-bit immediate");
24613 switch (nargs_constant)
24616 if ((nargs - i) == nargs_constant)
24618 error ("the next to last argument must be an 8-bit immediate");
24622 error ("the last argument must be an 8-bit immediate");
24625 gcc_unreachable ();
24632 if (VECTOR_MODE_P (mode))
24633 op = safe_vector_operand (op, mode);
24635 /* If we aren't optimizing, only allow one memory operand to
24637 if (memory_operand (op, mode))
24640 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
24642 if (optimize || !match || num_memory > 1)
24643 op = copy_to_mode_reg (mode, op);
24647 op = copy_to_reg (op);
24648 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
24653 args[i].mode = mode;
24659 pat = GEN_FCN (icode) (real_target, args[0].op);
24662 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
24665 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24669 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24670 args[2].op, args[3].op);
24673 gcc_unreachable ();
24683 /* Subroutine of ix86_expand_builtin to take care of special insns
24684 with variable number of operands. */
24687 ix86_expand_special_args_builtin (const struct builtin_description *d,
24688 tree exp, rtx target)
24692 unsigned int i, nargs, arg_adjust, memory;
24696 enum machine_mode mode;
24698 enum insn_code icode = d->icode;
24699 bool last_arg_constant = false;
24700 const struct insn_data_d *insn_p = &insn_data[icode];
24701 enum machine_mode tmode = insn_p->operand[0].mode;
24702 enum { load, store } klass;
24704 switch ((enum ix86_builtin_func_type) d->flag)
24706 case VOID_FTYPE_VOID:
24707 emit_insn (GEN_FCN (icode) (target));
24709 case VOID_FTYPE_UINT64:
24710 case VOID_FTYPE_UNSIGNED:
24716 case UINT64_FTYPE_VOID:
24717 case UNSIGNED_FTYPE_VOID:
24718 case UINT16_FTYPE_VOID:
24723 case UINT64_FTYPE_PUNSIGNED:
24724 case V2DI_FTYPE_PV2DI:
24725 case V32QI_FTYPE_PCCHAR:
24726 case V16QI_FTYPE_PCCHAR:
24727 case V8SF_FTYPE_PCV4SF:
24728 case V8SF_FTYPE_PCFLOAT:
24729 case V4SF_FTYPE_PCFLOAT:
24730 case V4DF_FTYPE_PCV2DF:
24731 case V4DF_FTYPE_PCDOUBLE:
24732 case V2DF_FTYPE_PCDOUBLE:
24733 case VOID_FTYPE_PVOID:
24738 case VOID_FTYPE_PV2SF_V4SF:
24739 case VOID_FTYPE_PV4DI_V4DI:
24740 case VOID_FTYPE_PV2DI_V2DI:
24741 case VOID_FTYPE_PCHAR_V32QI:
24742 case VOID_FTYPE_PCHAR_V16QI:
24743 case VOID_FTYPE_PFLOAT_V8SF:
24744 case VOID_FTYPE_PFLOAT_V4SF:
24745 case VOID_FTYPE_PDOUBLE_V4DF:
24746 case VOID_FTYPE_PDOUBLE_V2DF:
24747 case VOID_FTYPE_PULONGLONG_ULONGLONG:
24748 case VOID_FTYPE_PINT_INT:
24751 /* Reserve memory operand for target. */
24752 memory = ARRAY_SIZE (args);
24754 case V4SF_FTYPE_V4SF_PCV2SF:
24755 case V2DF_FTYPE_V2DF_PCDOUBLE:
24760 case V8SF_FTYPE_PCV8SF_V8SF:
24761 case V4DF_FTYPE_PCV4DF_V4DF:
24762 case V4SF_FTYPE_PCV4SF_V4SF:
24763 case V2DF_FTYPE_PCV2DF_V2DF:
24768 case VOID_FTYPE_PV8SF_V8SF_V8SF:
24769 case VOID_FTYPE_PV4DF_V4DF_V4DF:
24770 case VOID_FTYPE_PV4SF_V4SF_V4SF:
24771 case VOID_FTYPE_PV2DF_V2DF_V2DF:
24774 /* Reserve memory operand for target. */
24775 memory = ARRAY_SIZE (args);
24777 case VOID_FTYPE_UINT_UINT_UINT:
24778 case VOID_FTYPE_UINT64_UINT_UINT:
24779 case UCHAR_FTYPE_UINT_UINT_UINT:
24780 case UCHAR_FTYPE_UINT64_UINT_UINT:
24783 memory = ARRAY_SIZE (args);
24784 last_arg_constant = true;
24787 gcc_unreachable ();
24790 gcc_assert (nargs <= ARRAY_SIZE (args));
24792 if (klass == store)
24794 arg = CALL_EXPR_ARG (exp, 0);
24795 op = expand_normal (arg);
24796 gcc_assert (target == 0);
24798 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
24800 target = force_reg (tmode, op);
24808 || GET_MODE (target) != tmode
24809 || !insn_p->operand[0].predicate (target, tmode))
24810 target = gen_reg_rtx (tmode);
24813 for (i = 0; i < nargs; i++)
24815 enum machine_mode mode = insn_p->operand[i + 1].mode;
24818 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
24819 op = expand_normal (arg);
24820 match = insn_p->operand[i + 1].predicate (op, mode);
24822 if (last_arg_constant && (i + 1) == nargs)
24826 if (icode == CODE_FOR_lwp_lwpvalsi3
24827 || icode == CODE_FOR_lwp_lwpinssi3
24828 || icode == CODE_FOR_lwp_lwpvaldi3
24829 || icode == CODE_FOR_lwp_lwpinsdi3)
24830 error ("the last argument must be a 32-bit immediate");
24832 error ("the last argument must be an 8-bit immediate");
24840 /* This must be the memory operand. */
24841 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
24842 gcc_assert (GET_MODE (op) == mode
24843 || GET_MODE (op) == VOIDmode);
24847 /* This must be register. */
24848 if (VECTOR_MODE_P (mode))
24849 op = safe_vector_operand (op, mode);
24851 gcc_assert (GET_MODE (op) == mode
24852 || GET_MODE (op) == VOIDmode);
24853 op = copy_to_mode_reg (mode, op);
24858 args[i].mode = mode;
24864 pat = GEN_FCN (icode) (target);
24867 pat = GEN_FCN (icode) (target, args[0].op);
24870 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
24873 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
24876 gcc_unreachable ();
24882 return klass == store ? 0 : target;
24885 /* Return the integer constant in ARG. Constrain it to be in the range
24886 of the subparts of VEC_TYPE; issue an error if not. */
24889 get_element_number (tree vec_type, tree arg)
24891 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
24893 if (!host_integerp (arg, 1)
24894 || (elt = tree_low_cst (arg, 1), elt > max))
24896 error ("selector must be an integer constant in the range 0..%wi", max);
24903 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
24904 ix86_expand_vector_init. We DO have language-level syntax for this, in
24905 the form of (type){ init-list }. Except that since we can't place emms
24906 instructions from inside the compiler, we can't allow the use of MMX
24907 registers unless the user explicitly asks for it. So we do *not* define
24908 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
24909 we have builtins invoked by mmintrin.h that gives us license to emit
24910 these sorts of instructions. */
24913 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
24915 enum machine_mode tmode = TYPE_MODE (type);
24916 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
24917 int i, n_elt = GET_MODE_NUNITS (tmode);
24918 rtvec v = rtvec_alloc (n_elt);
24920 gcc_assert (VECTOR_MODE_P (tmode));
24921 gcc_assert (call_expr_nargs (exp) == n_elt);
24923 for (i = 0; i < n_elt; ++i)
24925 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
24926 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
24929 if (!target || !register_operand (target, tmode))
24930 target = gen_reg_rtx (tmode);
24932 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
24936 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
24937 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
24938 had a language-level syntax for referencing vector elements. */
24941 ix86_expand_vec_ext_builtin (tree exp, rtx target)
24943 enum machine_mode tmode, mode0;
24948 arg0 = CALL_EXPR_ARG (exp, 0);
24949 arg1 = CALL_EXPR_ARG (exp, 1);
24951 op0 = expand_normal (arg0);
24952 elt = get_element_number (TREE_TYPE (arg0), arg1);
24954 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
24955 mode0 = TYPE_MODE (TREE_TYPE (arg0));
24956 gcc_assert (VECTOR_MODE_P (mode0));
24958 op0 = force_reg (mode0, op0);
24960 if (optimize || !target || !register_operand (target, tmode))
24961 target = gen_reg_rtx (tmode);
24963 ix86_expand_vector_extract (true, target, op0, elt);
24968 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
24969 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
24970 a language-level syntax for referencing vector elements. */
24973 ix86_expand_vec_set_builtin (tree exp)
24975 enum machine_mode tmode, mode1;
24976 tree arg0, arg1, arg2;
24978 rtx op0, op1, target;
24980 arg0 = CALL_EXPR_ARG (exp, 0);
24981 arg1 = CALL_EXPR_ARG (exp, 1);
24982 arg2 = CALL_EXPR_ARG (exp, 2);
24984 tmode = TYPE_MODE (TREE_TYPE (arg0));
24985 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
24986 gcc_assert (VECTOR_MODE_P (tmode));
24988 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
24989 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
24990 elt = get_element_number (TREE_TYPE (arg0), arg2);
24992 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
24993 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
24995 op0 = force_reg (tmode, op0);
24996 op1 = force_reg (mode1, op1);
24998 /* OP0 is the source of these builtin functions and shouldn't be
24999 modified. Create a copy, use it and return it as target. */
25000 target = gen_reg_rtx (tmode);
25001 emit_move_insn (target, op0);
25002 ix86_expand_vector_set (true, target, op1, elt);
25007 /* Expand an expression EXP that calls a built-in function,
25008 with result going to TARGET if that's convenient
25009 (and in mode MODE if that's convenient).
25010 SUBTARGET may be used as the target for computing one of EXP's operands.
25011 IGNORE is nonzero if the value is to be ignored. */
25014 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
25015 enum machine_mode mode ATTRIBUTE_UNUSED,
25016 int ignore ATTRIBUTE_UNUSED)
25018 const struct builtin_description *d;
25020 enum insn_code icode;
25021 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25022 tree arg0, arg1, arg2;
25023 rtx op0, op1, op2, pat;
25024 enum machine_mode mode0, mode1, mode2;
25025 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25027 /* Determine whether the builtin function is available under the current ISA.
25028 Originally the builtin was not created if it wasn't applicable to the
25029 current ISA based on the command line switches. With function specific
25030 options, we need to check in the context of the function making the call
25031 whether it is supported. */
25032 if (ix86_builtins_isa[fcode].isa
25033 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
25035 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
25036 NULL, NULL, false);
25039 error ("%qE needs unknown isa option", fndecl);
25042 gcc_assert (opts != NULL);
25043 error ("%qE needs isa option %s", fndecl, opts);
25051 case IX86_BUILTIN_MASKMOVQ:
25052 case IX86_BUILTIN_MASKMOVDQU:
25053 icode = (fcode == IX86_BUILTIN_MASKMOVQ
25054 ? CODE_FOR_mmx_maskmovq
25055 : CODE_FOR_sse2_maskmovdqu);
25056 /* Note the arg order is different from the operand order. */
25057 arg1 = CALL_EXPR_ARG (exp, 0);
25058 arg2 = CALL_EXPR_ARG (exp, 1);
25059 arg0 = CALL_EXPR_ARG (exp, 2);
25060 op0 = expand_normal (arg0);
25061 op1 = expand_normal (arg1);
25062 op2 = expand_normal (arg2);
25063 mode0 = insn_data[icode].operand[0].mode;
25064 mode1 = insn_data[icode].operand[1].mode;
25065 mode2 = insn_data[icode].operand[2].mode;
25067 op0 = force_reg (Pmode, op0);
25068 op0 = gen_rtx_MEM (mode1, op0);
25070 if (!insn_data[icode].operand[0].predicate (op0, mode0))
25071 op0 = copy_to_mode_reg (mode0, op0);
25072 if (!insn_data[icode].operand[1].predicate (op1, mode1))
25073 op1 = copy_to_mode_reg (mode1, op1);
25074 if (!insn_data[icode].operand[2].predicate (op2, mode2))
25075 op2 = copy_to_mode_reg (mode2, op2);
25076 pat = GEN_FCN (icode) (op0, op1, op2);
25082 case IX86_BUILTIN_LDMXCSR:
25083 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
25084 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25085 emit_move_insn (target, op0);
25086 emit_insn (gen_sse_ldmxcsr (target));
25089 case IX86_BUILTIN_STMXCSR:
25090 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25091 emit_insn (gen_sse_stmxcsr (target));
25092 return copy_to_mode_reg (SImode, target);
25094 case IX86_BUILTIN_CLFLUSH:
25095 arg0 = CALL_EXPR_ARG (exp, 0);
25096 op0 = expand_normal (arg0);
25097 icode = CODE_FOR_sse2_clflush;
25098 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
25099 op0 = copy_to_mode_reg (Pmode, op0);
25101 emit_insn (gen_sse2_clflush (op0));
25104 case IX86_BUILTIN_MONITOR:
25105 arg0 = CALL_EXPR_ARG (exp, 0);
25106 arg1 = CALL_EXPR_ARG (exp, 1);
25107 arg2 = CALL_EXPR_ARG (exp, 2);
25108 op0 = expand_normal (arg0);
25109 op1 = expand_normal (arg1);
25110 op2 = expand_normal (arg2);
25112 op0 = copy_to_mode_reg (Pmode, op0);
25114 op1 = copy_to_mode_reg (SImode, op1);
25116 op2 = copy_to_mode_reg (SImode, op2);
25117 emit_insn (ix86_gen_monitor (op0, op1, op2));
25120 case IX86_BUILTIN_MWAIT:
25121 arg0 = CALL_EXPR_ARG (exp, 0);
25122 arg1 = CALL_EXPR_ARG (exp, 1);
25123 op0 = expand_normal (arg0);
25124 op1 = expand_normal (arg1);
25126 op0 = copy_to_mode_reg (SImode, op0);
25128 op1 = copy_to_mode_reg (SImode, op1);
25129 emit_insn (gen_sse3_mwait (op0, op1));
25132 case IX86_BUILTIN_VEC_INIT_V2SI:
25133 case IX86_BUILTIN_VEC_INIT_V4HI:
25134 case IX86_BUILTIN_VEC_INIT_V8QI:
25135 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
25137 case IX86_BUILTIN_VEC_EXT_V2DF:
25138 case IX86_BUILTIN_VEC_EXT_V2DI:
25139 case IX86_BUILTIN_VEC_EXT_V4SF:
25140 case IX86_BUILTIN_VEC_EXT_V4SI:
25141 case IX86_BUILTIN_VEC_EXT_V8HI:
25142 case IX86_BUILTIN_VEC_EXT_V2SI:
25143 case IX86_BUILTIN_VEC_EXT_V4HI:
25144 case IX86_BUILTIN_VEC_EXT_V16QI:
25145 return ix86_expand_vec_ext_builtin (exp, target);
25147 case IX86_BUILTIN_VEC_SET_V2DI:
25148 case IX86_BUILTIN_VEC_SET_V4SF:
25149 case IX86_BUILTIN_VEC_SET_V4SI:
25150 case IX86_BUILTIN_VEC_SET_V8HI:
25151 case IX86_BUILTIN_VEC_SET_V4HI:
25152 case IX86_BUILTIN_VEC_SET_V16QI:
25153 return ix86_expand_vec_set_builtin (exp);
25155 case IX86_BUILTIN_VEC_PERM_V2DF:
25156 case IX86_BUILTIN_VEC_PERM_V4SF:
25157 case IX86_BUILTIN_VEC_PERM_V2DI:
25158 case IX86_BUILTIN_VEC_PERM_V4SI:
25159 case IX86_BUILTIN_VEC_PERM_V8HI:
25160 case IX86_BUILTIN_VEC_PERM_V16QI:
25161 case IX86_BUILTIN_VEC_PERM_V2DI_U:
25162 case IX86_BUILTIN_VEC_PERM_V4SI_U:
25163 case IX86_BUILTIN_VEC_PERM_V8HI_U:
25164 case IX86_BUILTIN_VEC_PERM_V16QI_U:
25165 case IX86_BUILTIN_VEC_PERM_V4DF:
25166 case IX86_BUILTIN_VEC_PERM_V8SF:
25167 return ix86_expand_vec_perm_builtin (exp);
25169 case IX86_BUILTIN_INFQ:
25170 case IX86_BUILTIN_HUGE_VALQ:
25172 REAL_VALUE_TYPE inf;
25176 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
25178 tmp = validize_mem (force_const_mem (mode, tmp));
25181 target = gen_reg_rtx (mode);
25183 emit_move_insn (target, tmp);
25187 case IX86_BUILTIN_LLWPCB:
25188 arg0 = CALL_EXPR_ARG (exp, 0);
25189 op0 = expand_normal (arg0);
25190 icode = CODE_FOR_lwp_llwpcb;
25191 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
25192 op0 = copy_to_mode_reg (Pmode, op0);
25193 emit_insn (gen_lwp_llwpcb (op0));
25196 case IX86_BUILTIN_SLWPCB:
25197 icode = CODE_FOR_lwp_slwpcb;
25199 || !insn_data[icode].operand[0].predicate (target, Pmode))
25200 target = gen_reg_rtx (Pmode);
25201 emit_insn (gen_lwp_slwpcb (target));
25208 for (i = 0, d = bdesc_special_args;
25209 i < ARRAY_SIZE (bdesc_special_args);
25211 if (d->code == fcode)
25212 return ix86_expand_special_args_builtin (d, exp, target);
25214 for (i = 0, d = bdesc_args;
25215 i < ARRAY_SIZE (bdesc_args);
25217 if (d->code == fcode)
25220 case IX86_BUILTIN_FABSQ:
25221 case IX86_BUILTIN_COPYSIGNQ:
25223 /* Emit a normal call if SSE2 isn't available. */
25224 return expand_call (exp, target, ignore);
25226 return ix86_expand_args_builtin (d, exp, target);
25229 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25230 if (d->code == fcode)
25231 return ix86_expand_sse_comi (d, exp, target);
25233 for (i = 0, d = bdesc_pcmpestr;
25234 i < ARRAY_SIZE (bdesc_pcmpestr);
25236 if (d->code == fcode)
25237 return ix86_expand_sse_pcmpestr (d, exp, target);
25239 for (i = 0, d = bdesc_pcmpistr;
25240 i < ARRAY_SIZE (bdesc_pcmpistr);
25242 if (d->code == fcode)
25243 return ix86_expand_sse_pcmpistr (d, exp, target);
25245 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25246 if (d->code == fcode)
25247 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
25248 (enum ix86_builtin_func_type)
25249 d->flag, d->comparison);
25251 gcc_unreachable ();
25254 /* Returns a function decl for a vectorized version of the builtin function
25255 with builtin function code FN and the result vector type TYPE, or NULL_TREE
25256 if it is not available. */
25259 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
25262 enum machine_mode in_mode, out_mode;
25264 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
25266 if (TREE_CODE (type_out) != VECTOR_TYPE
25267 || TREE_CODE (type_in) != VECTOR_TYPE
25268 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
25271 out_mode = TYPE_MODE (TREE_TYPE (type_out));
25272 out_n = TYPE_VECTOR_SUBPARTS (type_out);
25273 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25274 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25278 case BUILT_IN_SQRT:
25279 if (out_mode == DFmode && out_n == 2
25280 && in_mode == DFmode && in_n == 2)
25281 return ix86_builtins[IX86_BUILTIN_SQRTPD];
25284 case BUILT_IN_SQRTF:
25285 if (out_mode == SFmode && out_n == 4
25286 && in_mode == SFmode && in_n == 4)
25287 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
25290 case BUILT_IN_LRINT:
25291 if (out_mode == SImode && out_n == 4
25292 && in_mode == DFmode && in_n == 2)
25293 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
25296 case BUILT_IN_LRINTF:
25297 if (out_mode == SImode && out_n == 4
25298 && in_mode == SFmode && in_n == 4)
25299 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
25302 case BUILT_IN_COPYSIGN:
25303 if (out_mode == DFmode && out_n == 2
25304 && in_mode == DFmode && in_n == 2)
25305 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
25308 case BUILT_IN_COPYSIGNF:
25309 if (out_mode == SFmode && out_n == 4
25310 && in_mode == SFmode && in_n == 4)
25311 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
25318 /* Dispatch to a handler for a vectorization library. */
25319 if (ix86_veclib_handler)
25320 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
25326 /* Handler for an SVML-style interface to
25327 a library with vectorized intrinsics. */
25330 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
25333 tree fntype, new_fndecl, args;
25336 enum machine_mode el_mode, in_mode;
25339 /* The SVML is suitable for unsafe math only. */
25340 if (!flag_unsafe_math_optimizations)
25343 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25344 n = TYPE_VECTOR_SUBPARTS (type_out);
25345 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25346 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25347 if (el_mode != in_mode
25355 case BUILT_IN_LOG10:
25357 case BUILT_IN_TANH:
25359 case BUILT_IN_ATAN:
25360 case BUILT_IN_ATAN2:
25361 case BUILT_IN_ATANH:
25362 case BUILT_IN_CBRT:
25363 case BUILT_IN_SINH:
25365 case BUILT_IN_ASINH:
25366 case BUILT_IN_ASIN:
25367 case BUILT_IN_COSH:
25369 case BUILT_IN_ACOSH:
25370 case BUILT_IN_ACOS:
25371 if (el_mode != DFmode || n != 2)
25375 case BUILT_IN_EXPF:
25376 case BUILT_IN_LOGF:
25377 case BUILT_IN_LOG10F:
25378 case BUILT_IN_POWF:
25379 case BUILT_IN_TANHF:
25380 case BUILT_IN_TANF:
25381 case BUILT_IN_ATANF:
25382 case BUILT_IN_ATAN2F:
25383 case BUILT_IN_ATANHF:
25384 case BUILT_IN_CBRTF:
25385 case BUILT_IN_SINHF:
25386 case BUILT_IN_SINF:
25387 case BUILT_IN_ASINHF:
25388 case BUILT_IN_ASINF:
25389 case BUILT_IN_COSHF:
25390 case BUILT_IN_COSF:
25391 case BUILT_IN_ACOSHF:
25392 case BUILT_IN_ACOSF:
25393 if (el_mode != SFmode || n != 4)
25401 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25403 if (fn == BUILT_IN_LOGF)
25404 strcpy (name, "vmlsLn4");
25405 else if (fn == BUILT_IN_LOG)
25406 strcpy (name, "vmldLn2");
25409 sprintf (name, "vmls%s", bname+10);
25410 name[strlen (name)-1] = '4';
25413 sprintf (name, "vmld%s2", bname+10);
25415 /* Convert to uppercase. */
25419 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25420 args = TREE_CHAIN (args))
25424 fntype = build_function_type_list (type_out, type_in, NULL);
25426 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25428 /* Build a function declaration for the vectorized function. */
25429 new_fndecl = build_decl (BUILTINS_LOCATION,
25430 FUNCTION_DECL, get_identifier (name), fntype);
25431 TREE_PUBLIC (new_fndecl) = 1;
25432 DECL_EXTERNAL (new_fndecl) = 1;
25433 DECL_IS_NOVOPS (new_fndecl) = 1;
25434 TREE_READONLY (new_fndecl) = 1;
25439 /* Handler for an ACML-style interface to
25440 a library with vectorized intrinsics. */
25443 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
25445 char name[20] = "__vr.._";
25446 tree fntype, new_fndecl, args;
25449 enum machine_mode el_mode, in_mode;
25452 /* The ACML is 64bits only and suitable for unsafe math only as
25453 it does not correctly support parts of IEEE with the required
25454 precision such as denormals. */
25456 || !flag_unsafe_math_optimizations)
25459 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25460 n = TYPE_VECTOR_SUBPARTS (type_out);
25461 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25462 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25463 if (el_mode != in_mode
25473 case BUILT_IN_LOG2:
25474 case BUILT_IN_LOG10:
25477 if (el_mode != DFmode
25482 case BUILT_IN_SINF:
25483 case BUILT_IN_COSF:
25484 case BUILT_IN_EXPF:
25485 case BUILT_IN_POWF:
25486 case BUILT_IN_LOGF:
25487 case BUILT_IN_LOG2F:
25488 case BUILT_IN_LOG10F:
25491 if (el_mode != SFmode
25500 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25501 sprintf (name + 7, "%s", bname+10);
25504 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25505 args = TREE_CHAIN (args))
25509 fntype = build_function_type_list (type_out, type_in, NULL);
25511 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25513 /* Build a function declaration for the vectorized function. */
25514 new_fndecl = build_decl (BUILTINS_LOCATION,
25515 FUNCTION_DECL, get_identifier (name), fntype);
25516 TREE_PUBLIC (new_fndecl) = 1;
25517 DECL_EXTERNAL (new_fndecl) = 1;
25518 DECL_IS_NOVOPS (new_fndecl) = 1;
25519 TREE_READONLY (new_fndecl) = 1;
25525 /* Returns a decl of a function that implements conversion of an integer vector
25526 into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
25527 are the types involved when converting according to CODE.
25528 Return NULL_TREE if it is not available. */
25531 ix86_vectorize_builtin_conversion (unsigned int code,
25532 tree dest_type, tree src_type)
25540 switch (TYPE_MODE (src_type))
25543 switch (TYPE_MODE (dest_type))
25546 return (TYPE_UNSIGNED (src_type)
25547 ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
25548 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
25550 return (TYPE_UNSIGNED (src_type)
25552 : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
25558 switch (TYPE_MODE (dest_type))
25561 return (TYPE_UNSIGNED (src_type)
25563 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
25572 case FIX_TRUNC_EXPR:
25573 switch (TYPE_MODE (dest_type))
25576 switch (TYPE_MODE (src_type))
25579 return (TYPE_UNSIGNED (dest_type)
25581 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
25583 return (TYPE_UNSIGNED (dest_type)
25585 : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
25592 switch (TYPE_MODE (src_type))
25595 return (TYPE_UNSIGNED (dest_type)
25597 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
25614 /* Returns a code for a target-specific builtin that implements
25615 reciprocal of the function, or NULL_TREE if not available. */
25618 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
25619 bool sqrt ATTRIBUTE_UNUSED)
25621 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
25622 && flag_finite_math_only && !flag_trapping_math
25623 && flag_unsafe_math_optimizations))
25627 /* Machine dependent builtins. */
25630 /* Vectorized version of sqrt to rsqrt conversion. */
25631 case IX86_BUILTIN_SQRTPS_NR:
25632 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
25638 /* Normal builtins. */
25641 /* Sqrt to rsqrt conversion. */
25642 case BUILT_IN_SQRTF:
25643 return ix86_builtins[IX86_BUILTIN_RSQRTF];
25650 /* Helper for avx_vpermilps256_operand et al. This is also used by
25651 the expansion functions to turn the parallel back into a mask.
25652 The return value is 0 for no match and the imm8+1 for a match. */
25655 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
25657 unsigned i, nelt = GET_MODE_NUNITS (mode);
25659 unsigned char ipar[8];
25661 if (XVECLEN (par, 0) != (int) nelt)
25664 /* Validate that all of the elements are constants, and not totally
25665 out of range. Copy the data into an integral array to make the
25666 subsequent checks easier. */
25667 for (i = 0; i < nelt; ++i)
25669 rtx er = XVECEXP (par, 0, i);
25670 unsigned HOST_WIDE_INT ei;
25672 if (!CONST_INT_P (er))
25683 /* In the 256-bit DFmode case, we can only move elements within
25685 for (i = 0; i < 2; ++i)
25689 mask |= ipar[i] << i;
25691 for (i = 2; i < 4; ++i)
25695 mask |= (ipar[i] - 2) << i;
25700 /* In the 256-bit SFmode case, we have full freedom of movement
25701 within the low 128-bit lane, but the high 128-bit lane must
25702 mirror the exact same pattern. */
25703 for (i = 0; i < 4; ++i)
25704 if (ipar[i] + 4 != ipar[i + 4])
25711 /* In the 128-bit case, we've full freedom in the placement of
25712 the elements from the source operand. */
25713 for (i = 0; i < nelt; ++i)
25714 mask |= ipar[i] << (i * (nelt / 2));
25718 gcc_unreachable ();
25721 /* Make sure success has a non-zero value by adding one. */
25725 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
25726 the expansion functions to turn the parallel back into a mask.
25727 The return value is 0 for no match and the imm8+1 for a match. */
25730 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
25732 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
25734 unsigned char ipar[8];
25736 if (XVECLEN (par, 0) != (int) nelt)
25739 /* Validate that all of the elements are constants, and not totally
25740 out of range. Copy the data into an integral array to make the
25741 subsequent checks easier. */
25742 for (i = 0; i < nelt; ++i)
25744 rtx er = XVECEXP (par, 0, i);
25745 unsigned HOST_WIDE_INT ei;
25747 if (!CONST_INT_P (er))
25750 if (ei >= 2 * nelt)
25755 /* Validate that the halves of the permute are halves. */
25756 for (i = 0; i < nelt2 - 1; ++i)
25757 if (ipar[i] + 1 != ipar[i + 1])
25759 for (i = nelt2; i < nelt - 1; ++i)
25760 if (ipar[i] + 1 != ipar[i + 1])
25763 /* Reconstruct the mask. */
25764 for (i = 0; i < 2; ++i)
25766 unsigned e = ipar[i * nelt2];
25770 mask |= e << (i * 4);
25773 /* Make sure success has a non-zero value by adding one. */
25778 /* Store OPERAND to the memory after reload is completed. This means
25779 that we can't easily use assign_stack_local. */
25781 ix86_force_to_memory (enum machine_mode mode, rtx operand)
25785 gcc_assert (reload_completed);
25786 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE)
25788 result = gen_rtx_MEM (mode,
25789 gen_rtx_PLUS (Pmode,
25791 GEN_INT (-RED_ZONE_SIZE)));
25792 emit_move_insn (result, operand);
25794 else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT)
25800 operand = gen_lowpart (DImode, operand);
25804 gen_rtx_SET (VOIDmode,
25805 gen_rtx_MEM (DImode,
25806 gen_rtx_PRE_DEC (DImode,
25807 stack_pointer_rtx)),
25811 gcc_unreachable ();
25813 result = gen_rtx_MEM (mode, stack_pointer_rtx);
25822 split_di (&operand, 1, operands, operands + 1);
25824 gen_rtx_SET (VOIDmode,
25825 gen_rtx_MEM (SImode,
25826 gen_rtx_PRE_DEC (Pmode,
25827 stack_pointer_rtx)),
25830 gen_rtx_SET (VOIDmode,
25831 gen_rtx_MEM (SImode,
25832 gen_rtx_PRE_DEC (Pmode,
25833 stack_pointer_rtx)),
25838 /* Store HImodes as SImodes. */
25839 operand = gen_lowpart (SImode, operand);
25843 gen_rtx_SET (VOIDmode,
25844 gen_rtx_MEM (GET_MODE (operand),
25845 gen_rtx_PRE_DEC (SImode,
25846 stack_pointer_rtx)),
25850 gcc_unreachable ();
25852 result = gen_rtx_MEM (mode, stack_pointer_rtx);
25857 /* Free operand from the memory. */
25859 ix86_free_from_memory (enum machine_mode mode)
25861 if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI)
25865 if (mode == DImode || TARGET_64BIT)
25869 /* Use LEA to deallocate stack space. In peephole2 it will be converted
25870 to pop or add instruction if registers are available. */
25871 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
25872 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
25877 /* Implement TARGET_IRA_COVER_CLASSES. If -mfpmath=sse, we prefer
25878 SSE_REGS to FLOAT_REGS if their costs for a pseudo are the
25880 static const reg_class_t *
25881 i386_ira_cover_classes (void)
25883 static const reg_class_t sse_fpmath_classes[] = {
25884 GENERAL_REGS, SSE_REGS, MMX_REGS, FLOAT_REGS, LIM_REG_CLASSES
25886 static const reg_class_t no_sse_fpmath_classes[] = {
25887 GENERAL_REGS, FLOAT_REGS, MMX_REGS, SSE_REGS, LIM_REG_CLASSES
25890 return TARGET_SSE_MATH ? sse_fpmath_classes : no_sse_fpmath_classes;
25893 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
25894 QImode must go into class Q_REGS.
25895 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
25896 movdf to do mem-to-mem moves through integer regs. */
25898 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
25900 enum machine_mode mode = GET_MODE (x);
25902 /* We're only allowed to return a subclass of CLASS. Many of the
25903 following checks fail for NO_REGS, so eliminate that early. */
25904 if (regclass == NO_REGS)
25907 /* All classes can load zeros. */
25908 if (x == CONST0_RTX (mode))
25911 /* Force constants into memory if we are loading a (nonzero) constant into
25912 an MMX or SSE register. This is because there are no MMX/SSE instructions
25913 to load from a constant. */
25915 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
25918 /* Prefer SSE regs only, if we can use them for math. */
25919 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
25920 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
25922 /* Floating-point constants need more complex checks. */
25923 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
25925 /* General regs can load everything. */
25926 if (reg_class_subset_p (regclass, GENERAL_REGS))
25929 /* Floats can load 0 and 1 plus some others. Note that we eliminated
25930 zero above. We only want to wind up preferring 80387 registers if
25931 we plan on doing computation with them. */
25933 && standard_80387_constant_p (x))
25935 /* Limit class to non-sse. */
25936 if (regclass == FLOAT_SSE_REGS)
25938 if (regclass == FP_TOP_SSE_REGS)
25940 if (regclass == FP_SECOND_SSE_REGS)
25941 return FP_SECOND_REG;
25942 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
25949 /* Generally when we see PLUS here, it's the function invariant
25950 (plus soft-fp const_int). Which can only be computed into general
25952 if (GET_CODE (x) == PLUS)
25953 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
25955 /* QImode constants are easy to load, but non-constant QImode data
25956 must go into Q_REGS. */
25957 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
25959 if (reg_class_subset_p (regclass, Q_REGS))
25961 if (reg_class_subset_p (Q_REGS, regclass))
25969 /* Discourage putting floating-point values in SSE registers unless
25970 SSE math is being used, and likewise for the 387 registers. */
25972 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
25974 enum machine_mode mode = GET_MODE (x);
25976 /* Restrict the output reload class to the register bank that we are doing
25977 math on. If we would like not to return a subset of CLASS, reject this
25978 alternative: if reload cannot do this, it will still use its choice. */
25979 mode = GET_MODE (x);
25980 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
25981 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
25983 if (X87_FLOAT_MODE_P (mode))
25985 if (regclass == FP_TOP_SSE_REGS)
25987 else if (regclass == FP_SECOND_SSE_REGS)
25988 return FP_SECOND_REG;
25990 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
25997 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
25998 enum machine_mode mode,
25999 secondary_reload_info *sri ATTRIBUTE_UNUSED)
26001 /* QImode spills from non-QI registers require
26002 intermediate register on 32bit targets. */
26003 if (!in_p && mode == QImode && !TARGET_64BIT
26004 && (rclass == GENERAL_REGS
26005 || rclass == LEGACY_REGS
26006 || rclass == INDEX_REGS))
26015 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
26016 regno = true_regnum (x);
26018 /* Return Q_REGS if the operand is in memory. */
26026 /* If we are copying between general and FP registers, we need a memory
26027 location. The same is true for SSE and MMX registers.
26029 To optimize register_move_cost performance, allow inline variant.
26031 The macro can't work reliably when one of the CLASSES is class containing
26032 registers from multiple units (SSE, MMX, integer). We avoid this by never
26033 combining those units in single alternative in the machine description.
26034 Ensure that this constraint holds to avoid unexpected surprises.
26036 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
26037 enforce these sanity checks. */
26040 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26041 enum machine_mode mode, int strict)
26043 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
26044 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
26045 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
26046 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
26047 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
26048 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
26050 gcc_assert (!strict);
26054 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
26057 /* ??? This is a lie. We do have moves between mmx/general, and for
26058 mmx/sse2. But by saying we need secondary memory we discourage the
26059 register allocator from using the mmx registers unless needed. */
26060 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
26063 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26065 /* SSE1 doesn't have any direct moves from other classes. */
26069 /* If the target says that inter-unit moves are more expensive
26070 than moving through memory, then don't generate them. */
26071 if (!TARGET_INTER_UNIT_MOVES)
26074 /* Between SSE and general, we have moves no larger than word size. */
26075 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
26083 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26084 enum machine_mode mode, int strict)
26086 return inline_secondary_memory_needed (class1, class2, mode, strict);
26089 /* Return true if the registers in CLASS cannot represent the change from
26090 modes FROM to TO. */
26093 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
26094 enum reg_class regclass)
26099 /* x87 registers can't do subreg at all, as all values are reformatted
26100 to extended precision. */
26101 if (MAYBE_FLOAT_CLASS_P (regclass))
26104 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
26106 /* Vector registers do not support QI or HImode loads. If we don't
26107 disallow a change to these modes, reload will assume it's ok to
26108 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
26109 the vec_dupv4hi pattern. */
26110 if (GET_MODE_SIZE (from) < 4)
26113 /* Vector registers do not support subreg with nonzero offsets, which
26114 are otherwise valid for integer registers. Since we can't see
26115 whether we have a nonzero offset from here, prohibit all
26116 nonparadoxical subregs changing size. */
26117 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
26124 /* Return the cost of moving data of mode M between a
26125 register and memory. A value of 2 is the default; this cost is
26126 relative to those in `REGISTER_MOVE_COST'.
26128 This function is used extensively by register_move_cost that is used to
26129 build tables at startup. Make it inline in this case.
26130 When IN is 2, return maximum of in and out move cost.
26132 If moving between registers and memory is more expensive than
26133 between two registers, you should define this macro to express the
26136 Model also increased moving costs of QImode registers in non
26140 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
26144 if (FLOAT_CLASS_P (regclass))
26162 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
26163 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
26165 if (SSE_CLASS_P (regclass))
26168 switch (GET_MODE_SIZE (mode))
26183 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
26184 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
26186 if (MMX_CLASS_P (regclass))
26189 switch (GET_MODE_SIZE (mode))
26201 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
26202 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
26204 switch (GET_MODE_SIZE (mode))
26207 if (Q_CLASS_P (regclass) || TARGET_64BIT)
26210 return ix86_cost->int_store[0];
26211 if (TARGET_PARTIAL_REG_DEPENDENCY
26212 && optimize_function_for_speed_p (cfun))
26213 cost = ix86_cost->movzbl_load;
26215 cost = ix86_cost->int_load[0];
26217 return MAX (cost, ix86_cost->int_store[0]);
26223 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
26225 return ix86_cost->movzbl_load;
26227 return ix86_cost->int_store[0] + 4;
26232 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
26233 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
26235 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
26236 if (mode == TFmode)
26239 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
26241 cost = ix86_cost->int_load[2];
26243 cost = ix86_cost->int_store[2];
26244 return (cost * (((int) GET_MODE_SIZE (mode)
26245 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
26250 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
26253 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
26257 /* Return the cost of moving data from a register in class CLASS1 to
26258 one in class CLASS2.
26260 It is not required that the cost always equal 2 when FROM is the same as TO;
26261 on some machines it is expensive to move between registers if they are not
26262 general registers. */
26265 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
26266 reg_class_t class2_i)
26268 enum reg_class class1 = (enum reg_class) class1_i;
26269 enum reg_class class2 = (enum reg_class) class2_i;
26271 /* In case we require secondary memory, compute cost of the store followed
26272 by load. In order to avoid bad register allocation choices, we need
26273 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
26275 if (inline_secondary_memory_needed (class1, class2, mode, 0))
26279 cost += inline_memory_move_cost (mode, class1, 2);
26280 cost += inline_memory_move_cost (mode, class2, 2);
26282 /* In case of copying from general_purpose_register we may emit multiple
26283 stores followed by single load causing memory size mismatch stall.
26284 Count this as arbitrarily high cost of 20. */
26285 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
26288 /* In the case of FP/MMX moves, the registers actually overlap, and we
26289 have to switch modes in order to treat them differently. */
26290 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
26291 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
26297 /* Moves between SSE/MMX and integer unit are expensive. */
26298 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
26299 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26301 /* ??? By keeping returned value relatively high, we limit the number
26302 of moves between integer and MMX/SSE registers for all targets.
26303 Additionally, high value prevents problem with x86_modes_tieable_p(),
26304 where integer modes in MMX/SSE registers are not tieable
26305 because of missing QImode and HImode moves to, from or between
26306 MMX/SSE registers. */
26307 return MAX (8, ix86_cost->mmxsse_to_integer);
26309 if (MAYBE_FLOAT_CLASS_P (class1))
26310 return ix86_cost->fp_move;
26311 if (MAYBE_SSE_CLASS_P (class1))
26312 return ix86_cost->sse_move;
26313 if (MAYBE_MMX_CLASS_P (class1))
26314 return ix86_cost->mmx_move;
26318 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
26321 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
26323 /* Flags and only flags can only hold CCmode values. */
26324 if (CC_REGNO_P (regno))
26325 return GET_MODE_CLASS (mode) == MODE_CC;
26326 if (GET_MODE_CLASS (mode) == MODE_CC
26327 || GET_MODE_CLASS (mode) == MODE_RANDOM
26328 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
26330 if (FP_REGNO_P (regno))
26331 return VALID_FP_MODE_P (mode);
26332 if (SSE_REGNO_P (regno))
26334 /* We implement the move patterns for all vector modes into and
26335 out of SSE registers, even when no operation instructions
26336 are available. OImode move is available only when AVX is
26338 return ((TARGET_AVX && mode == OImode)
26339 || VALID_AVX256_REG_MODE (mode)
26340 || VALID_SSE_REG_MODE (mode)
26341 || VALID_SSE2_REG_MODE (mode)
26342 || VALID_MMX_REG_MODE (mode)
26343 || VALID_MMX_REG_MODE_3DNOW (mode));
26345 if (MMX_REGNO_P (regno))
26347 /* We implement the move patterns for 3DNOW modes even in MMX mode,
26348 so if the register is available at all, then we can move data of
26349 the given mode into or out of it. */
26350 return (VALID_MMX_REG_MODE (mode)
26351 || VALID_MMX_REG_MODE_3DNOW (mode));
26354 if (mode == QImode)
26356 /* Take care for QImode values - they can be in non-QI regs,
26357 but then they do cause partial register stalls. */
26358 if (regno <= BX_REG || TARGET_64BIT)
26360 if (!TARGET_PARTIAL_REG_STALL)
26362 return reload_in_progress || reload_completed;
26364 /* We handle both integer and floats in the general purpose registers. */
26365 else if (VALID_INT_MODE_P (mode))
26367 else if (VALID_FP_MODE_P (mode))
26369 else if (VALID_DFP_MODE_P (mode))
26371 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
26372 on to use that value in smaller contexts, this can easily force a
26373 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
26374 supporting DImode, allow it. */
26375 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
26381 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
26382 tieable integer mode. */
26385 ix86_tieable_integer_mode_p (enum machine_mode mode)
26394 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
26397 return TARGET_64BIT;
26404 /* Return true if MODE1 is accessible in a register that can hold MODE2
26405 without copying. That is, all register classes that can hold MODE2
26406 can also hold MODE1. */
26409 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
26411 if (mode1 == mode2)
26414 if (ix86_tieable_integer_mode_p (mode1)
26415 && ix86_tieable_integer_mode_p (mode2))
26418 /* MODE2 being XFmode implies fp stack or general regs, which means we
26419 can tie any smaller floating point modes to it. Note that we do not
26420 tie this with TFmode. */
26421 if (mode2 == XFmode)
26422 return mode1 == SFmode || mode1 == DFmode;
26424 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
26425 that we can tie it with SFmode. */
26426 if (mode2 == DFmode)
26427 return mode1 == SFmode;
26429 /* If MODE2 is only appropriate for an SSE register, then tie with
26430 any other mode acceptable to SSE registers. */
26431 if (GET_MODE_SIZE (mode2) == 16
26432 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
26433 return (GET_MODE_SIZE (mode1) == 16
26434 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
26436 /* If MODE2 is appropriate for an MMX register, then tie
26437 with any other mode acceptable to MMX registers. */
26438 if (GET_MODE_SIZE (mode2) == 8
26439 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
26440 return (GET_MODE_SIZE (mode1) == 8
26441 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
26446 /* Compute a (partial) cost for rtx X. Return true if the complete
26447 cost has been computed, and false if subexpressions should be
26448 scanned. In either case, *TOTAL contains the cost result. */
26451 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
26453 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
26454 enum machine_mode mode = GET_MODE (x);
26455 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
26463 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
26465 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
26467 else if (flag_pic && SYMBOLIC_CONST (x)
26469 || (!GET_CODE (x) != LABEL_REF
26470 && (GET_CODE (x) != SYMBOL_REF
26471 || !SYMBOL_REF_LOCAL_P (x)))))
26478 if (mode == VOIDmode)
26481 switch (standard_80387_constant_p (x))
26486 default: /* Other constants */
26491 /* Start with (MEM (SYMBOL_REF)), since that's where
26492 it'll probably end up. Add a penalty for size. */
26493 *total = (COSTS_N_INSNS (1)
26494 + (flag_pic != 0 && !TARGET_64BIT)
26495 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
26501 /* The zero extensions is often completely free on x86_64, so make
26502 it as cheap as possible. */
26503 if (TARGET_64BIT && mode == DImode
26504 && GET_MODE (XEXP (x, 0)) == SImode)
26506 else if (TARGET_ZERO_EXTEND_WITH_AND)
26507 *total = cost->add;
26509 *total = cost->movzx;
26513 *total = cost->movsx;
26517 if (CONST_INT_P (XEXP (x, 1))
26518 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
26520 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26523 *total = cost->add;
26526 if ((value == 2 || value == 3)
26527 && cost->lea <= cost->shift_const)
26529 *total = cost->lea;
26539 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
26541 if (CONST_INT_P (XEXP (x, 1)))
26543 if (INTVAL (XEXP (x, 1)) > 32)
26544 *total = cost->shift_const + COSTS_N_INSNS (2);
26546 *total = cost->shift_const * 2;
26550 if (GET_CODE (XEXP (x, 1)) == AND)
26551 *total = cost->shift_var * 2;
26553 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
26558 if (CONST_INT_P (XEXP (x, 1)))
26559 *total = cost->shift_const;
26561 *total = cost->shift_var;
26566 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26568 /* ??? SSE scalar cost should be used here. */
26569 *total = cost->fmul;
26572 else if (X87_FLOAT_MODE_P (mode))
26574 *total = cost->fmul;
26577 else if (FLOAT_MODE_P (mode))
26579 /* ??? SSE vector cost should be used here. */
26580 *total = cost->fmul;
26585 rtx op0 = XEXP (x, 0);
26586 rtx op1 = XEXP (x, 1);
26588 if (CONST_INT_P (XEXP (x, 1)))
26590 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26591 for (nbits = 0; value != 0; value &= value - 1)
26595 /* This is arbitrary. */
26598 /* Compute costs correctly for widening multiplication. */
26599 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
26600 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
26601 == GET_MODE_SIZE (mode))
26603 int is_mulwiden = 0;
26604 enum machine_mode inner_mode = GET_MODE (op0);
26606 if (GET_CODE (op0) == GET_CODE (op1))
26607 is_mulwiden = 1, op1 = XEXP (op1, 0);
26608 else if (CONST_INT_P (op1))
26610 if (GET_CODE (op0) == SIGN_EXTEND)
26611 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
26614 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
26618 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
26621 *total = (cost->mult_init[MODE_INDEX (mode)]
26622 + nbits * cost->mult_bit
26623 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
26632 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26633 /* ??? SSE cost should be used here. */
26634 *total = cost->fdiv;
26635 else if (X87_FLOAT_MODE_P (mode))
26636 *total = cost->fdiv;
26637 else if (FLOAT_MODE_P (mode))
26638 /* ??? SSE vector cost should be used here. */
26639 *total = cost->fdiv;
26641 *total = cost->divide[MODE_INDEX (mode)];
26645 if (GET_MODE_CLASS (mode) == MODE_INT
26646 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
26648 if (GET_CODE (XEXP (x, 0)) == PLUS
26649 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
26650 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
26651 && CONSTANT_P (XEXP (x, 1)))
26653 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
26654 if (val == 2 || val == 4 || val == 8)
26656 *total = cost->lea;
26657 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26658 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
26659 outer_code, speed);
26660 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26664 else if (GET_CODE (XEXP (x, 0)) == MULT
26665 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
26667 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
26668 if (val == 2 || val == 4 || val == 8)
26670 *total = cost->lea;
26671 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26672 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26676 else if (GET_CODE (XEXP (x, 0)) == PLUS)
26678 *total = cost->lea;
26679 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26680 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26681 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26688 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26690 /* ??? SSE cost should be used here. */
26691 *total = cost->fadd;
26694 else if (X87_FLOAT_MODE_P (mode))
26696 *total = cost->fadd;
26699 else if (FLOAT_MODE_P (mode))
26701 /* ??? SSE vector cost should be used here. */
26702 *total = cost->fadd;
26710 if (!TARGET_64BIT && mode == DImode)
26712 *total = (cost->add * 2
26713 + (rtx_cost (XEXP (x, 0), outer_code, speed)
26714 << (GET_MODE (XEXP (x, 0)) != DImode))
26715 + (rtx_cost (XEXP (x, 1), outer_code, speed)
26716 << (GET_MODE (XEXP (x, 1)) != DImode)));
26722 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26724 /* ??? SSE cost should be used here. */
26725 *total = cost->fchs;
26728 else if (X87_FLOAT_MODE_P (mode))
26730 *total = cost->fchs;
26733 else if (FLOAT_MODE_P (mode))
26735 /* ??? SSE vector cost should be used here. */
26736 *total = cost->fchs;
26742 if (!TARGET_64BIT && mode == DImode)
26743 *total = cost->add * 2;
26745 *total = cost->add;
26749 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
26750 && XEXP (XEXP (x, 0), 1) == const1_rtx
26751 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
26752 && XEXP (x, 1) == const0_rtx)
26754 /* This kind of construct is implemented using test[bwl].
26755 Treat it as if we had an AND. */
26756 *total = (cost->add
26757 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
26758 + rtx_cost (const1_rtx, outer_code, speed));
26764 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
26769 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26770 /* ??? SSE cost should be used here. */
26771 *total = cost->fabs;
26772 else if (X87_FLOAT_MODE_P (mode))
26773 *total = cost->fabs;
26774 else if (FLOAT_MODE_P (mode))
26775 /* ??? SSE vector cost should be used here. */
26776 *total = cost->fabs;
26780 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26781 /* ??? SSE cost should be used here. */
26782 *total = cost->fsqrt;
26783 else if (X87_FLOAT_MODE_P (mode))
26784 *total = cost->fsqrt;
26785 else if (FLOAT_MODE_P (mode))
26786 /* ??? SSE vector cost should be used here. */
26787 *total = cost->fsqrt;
26791 if (XINT (x, 1) == UNSPEC_TP)
26798 case VEC_DUPLICATE:
26799 /* ??? Assume all of these vector manipulation patterns are
26800 recognizable. In which case they all pretty much have the
26802 *total = COSTS_N_INSNS (1);
26812 static int current_machopic_label_num;
26814 /* Given a symbol name and its associated stub, write out the
26815 definition of the stub. */
26818 machopic_output_stub (FILE *file, const char *symb, const char *stub)
26820 unsigned int length;
26821 char *binder_name, *symbol_name, lazy_ptr_name[32];
26822 int label = ++current_machopic_label_num;
26824 /* For 64-bit we shouldn't get here. */
26825 gcc_assert (!TARGET_64BIT);
26827 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
26828 symb = targetm.strip_name_encoding (symb);
26830 length = strlen (stub);
26831 binder_name = XALLOCAVEC (char, length + 32);
26832 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
26834 length = strlen (symb);
26835 symbol_name = XALLOCAVEC (char, length + 32);
26836 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
26838 sprintf (lazy_ptr_name, "L%d$lz", label);
26841 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
26843 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
26845 fprintf (file, "%s:\n", stub);
26846 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26850 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
26851 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
26852 fprintf (file, "\tjmp\t*%%edx\n");
26855 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
26857 fprintf (file, "%s:\n", binder_name);
26861 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
26862 fputs ("\tpushl\t%eax\n", file);
26865 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
26867 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
26869 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
26870 fprintf (file, "%s:\n", lazy_ptr_name);
26871 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26872 fprintf (file, ASM_LONG "%s\n", binder_name);
26874 #endif /* TARGET_MACHO */
26876 /* Order the registers for register allocator. */
26879 x86_order_regs_for_local_alloc (void)
26884 /* First allocate the local general purpose registers. */
26885 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26886 if (GENERAL_REGNO_P (i) && call_used_regs[i])
26887 reg_alloc_order [pos++] = i;
26889 /* Global general purpose registers. */
26890 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26891 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
26892 reg_alloc_order [pos++] = i;
26894 /* x87 registers come first in case we are doing FP math
26896 if (!TARGET_SSE_MATH)
26897 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
26898 reg_alloc_order [pos++] = i;
26900 /* SSE registers. */
26901 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
26902 reg_alloc_order [pos++] = i;
26903 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
26904 reg_alloc_order [pos++] = i;
26906 /* x87 registers. */
26907 if (TARGET_SSE_MATH)
26908 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
26909 reg_alloc_order [pos++] = i;
26911 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
26912 reg_alloc_order [pos++] = i;
26914 /* Initialize the rest of array as we do not allocate some registers
26916 while (pos < FIRST_PSEUDO_REGISTER)
26917 reg_alloc_order [pos++] = 0;
26920 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
26921 struct attribute_spec.handler. */
26923 ix86_handle_abi_attribute (tree *node, tree name,
26924 tree args ATTRIBUTE_UNUSED,
26925 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26927 if (TREE_CODE (*node) != FUNCTION_TYPE
26928 && TREE_CODE (*node) != METHOD_TYPE
26929 && TREE_CODE (*node) != FIELD_DECL
26930 && TREE_CODE (*node) != TYPE_DECL)
26932 warning (OPT_Wattributes, "%qE attribute only applies to functions",
26934 *no_add_attrs = true;
26939 warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
26941 *no_add_attrs = true;
26945 /* Can combine regparm with all attributes but fastcall. */
26946 if (is_attribute_p ("ms_abi", name))
26948 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
26950 error ("ms_abi and sysv_abi attributes are not compatible");
26955 else if (is_attribute_p ("sysv_abi", name))
26957 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
26959 error ("ms_abi and sysv_abi attributes are not compatible");
26968 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
26969 struct attribute_spec.handler. */
26971 ix86_handle_struct_attribute (tree *node, tree name,
26972 tree args ATTRIBUTE_UNUSED,
26973 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26976 if (DECL_P (*node))
26978 if (TREE_CODE (*node) == TYPE_DECL)
26979 type = &TREE_TYPE (*node);
26984 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
26985 || TREE_CODE (*type) == UNION_TYPE)))
26987 warning (OPT_Wattributes, "%qE attribute ignored",
26989 *no_add_attrs = true;
26992 else if ((is_attribute_p ("ms_struct", name)
26993 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
26994 || ((is_attribute_p ("gcc_struct", name)
26995 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
26997 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
26999 *no_add_attrs = true;
27006 ix86_handle_fndecl_attribute (tree *node, tree name,
27007 tree args ATTRIBUTE_UNUSED,
27008 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27010 if (TREE_CODE (*node) != FUNCTION_DECL)
27012 warning (OPT_Wattributes, "%qE attribute only applies to functions",
27014 *no_add_attrs = true;
27018 #ifndef HAVE_AS_IX86_SWAP
27020 sorry ("ms_hook_prologue attribute needs assembler swap suffix support");
27027 ix86_ms_bitfield_layout_p (const_tree record_type)
27029 return (TARGET_MS_BITFIELD_LAYOUT &&
27030 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
27031 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
27034 /* Returns an expression indicating where the this parameter is
27035 located on entry to the FUNCTION. */
27038 x86_this_parameter (tree function)
27040 tree type = TREE_TYPE (function);
27041 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
27046 const int *parm_regs;
27048 if (ix86_function_type_abi (type) == MS_ABI)
27049 parm_regs = x86_64_ms_abi_int_parameter_registers;
27051 parm_regs = x86_64_int_parameter_registers;
27052 return gen_rtx_REG (DImode, parm_regs[aggr]);
27055 nregs = ix86_function_regparm (type, function);
27057 if (nregs > 0 && !stdarg_p (type))
27061 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
27062 regno = aggr ? DX_REG : CX_REG;
27063 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
27067 return gen_rtx_MEM (SImode,
27068 plus_constant (stack_pointer_rtx, 4));
27077 return gen_rtx_MEM (SImode,
27078 plus_constant (stack_pointer_rtx, 4));
27081 return gen_rtx_REG (SImode, regno);
27084 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
27087 /* Determine whether x86_output_mi_thunk can succeed. */
27090 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
27091 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
27092 HOST_WIDE_INT vcall_offset, const_tree function)
27094 /* 64-bit can handle anything. */
27098 /* For 32-bit, everything's fine if we have one free register. */
27099 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
27102 /* Need a free register for vcall_offset. */
27106 /* Need a free register for GOT references. */
27107 if (flag_pic && !targetm.binds_local_p (function))
27110 /* Otherwise ok. */
27114 /* Output the assembler code for a thunk function. THUNK_DECL is the
27115 declaration for the thunk function itself, FUNCTION is the decl for
27116 the target function. DELTA is an immediate constant offset to be
27117 added to THIS. If VCALL_OFFSET is nonzero, the word at
27118 *(*this + vcall_offset) should be added to THIS. */
27121 x86_output_mi_thunk (FILE *file,
27122 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
27123 HOST_WIDE_INT vcall_offset, tree function)
27126 rtx this_param = x86_this_parameter (function);
27129 /* Make sure unwind info is emitted for the thunk if needed. */
27130 final_start_function (emit_barrier (), file, 1);
27132 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
27133 pull it in now and let DELTA benefit. */
27134 if (REG_P (this_param))
27135 this_reg = this_param;
27136 else if (vcall_offset)
27138 /* Put the this parameter into %eax. */
27139 xops[0] = this_param;
27140 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
27141 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27144 this_reg = NULL_RTX;
27146 /* Adjust the this parameter by a fixed constant. */
27149 xops[0] = GEN_INT (delta);
27150 xops[1] = this_reg ? this_reg : this_param;
27153 if (!x86_64_general_operand (xops[0], DImode))
27155 tmp = gen_rtx_REG (DImode, R10_REG);
27157 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
27159 xops[1] = this_param;
27161 if (x86_maybe_negate_const_int (&xops[0], DImode))
27162 output_asm_insn ("sub{q}\t{%0, %1|%1, %0}", xops);
27164 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
27166 else if (x86_maybe_negate_const_int (&xops[0], SImode))
27167 output_asm_insn ("sub{l}\t{%0, %1|%1, %0}", xops);
27169 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
27172 /* Adjust the this parameter by a value stored in the vtable. */
27176 tmp = gen_rtx_REG (DImode, R10_REG);
27179 int tmp_regno = CX_REG;
27180 if (lookup_attribute ("fastcall",
27181 TYPE_ATTRIBUTES (TREE_TYPE (function)))
27182 || lookup_attribute ("thiscall",
27183 TYPE_ATTRIBUTES (TREE_TYPE (function))))
27184 tmp_regno = AX_REG;
27185 tmp = gen_rtx_REG (SImode, tmp_regno);
27188 xops[0] = gen_rtx_MEM (Pmode, this_reg);
27190 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27192 /* Adjust the this parameter. */
27193 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
27194 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
27196 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
27197 xops[0] = GEN_INT (vcall_offset);
27199 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
27200 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
27202 xops[1] = this_reg;
27203 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
27206 /* If necessary, drop THIS back to its stack slot. */
27207 if (this_reg && this_reg != this_param)
27209 xops[0] = this_reg;
27210 xops[1] = this_param;
27211 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27214 xops[0] = XEXP (DECL_RTL (function), 0);
27217 if (!flag_pic || targetm.binds_local_p (function))
27218 output_asm_insn ("jmp\t%P0", xops);
27219 /* All thunks should be in the same object as their target,
27220 and thus binds_local_p should be true. */
27221 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
27222 gcc_unreachable ();
27225 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
27226 tmp = gen_rtx_CONST (Pmode, tmp);
27227 tmp = gen_rtx_MEM (QImode, tmp);
27229 output_asm_insn ("jmp\t%A0", xops);
27234 if (!flag_pic || targetm.binds_local_p (function))
27235 output_asm_insn ("jmp\t%P0", xops);
27240 rtx sym_ref = XEXP (DECL_RTL (function), 0);
27241 if (TARGET_MACHO_BRANCH_ISLANDS)
27242 sym_ref = (gen_rtx_SYMBOL_REF
27244 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
27245 tmp = gen_rtx_MEM (QImode, sym_ref);
27247 output_asm_insn ("jmp\t%0", xops);
27250 #endif /* TARGET_MACHO */
27252 tmp = gen_rtx_REG (SImode, CX_REG);
27253 output_set_got (tmp, NULL_RTX);
27256 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
27257 output_asm_insn ("jmp\t{*}%1", xops);
27260 final_end_function ();
27264 x86_file_start (void)
27266 default_file_start ();
27268 darwin_file_start ();
27270 if (X86_FILE_START_VERSION_DIRECTIVE)
27271 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
27272 if (X86_FILE_START_FLTUSED)
27273 fputs ("\t.global\t__fltused\n", asm_out_file);
27274 if (ix86_asm_dialect == ASM_INTEL)
27275 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
27279 x86_field_alignment (tree field, int computed)
27281 enum machine_mode mode;
27282 tree type = TREE_TYPE (field);
27284 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
27286 mode = TYPE_MODE (strip_array_types (type));
27287 if (mode == DFmode || mode == DCmode
27288 || GET_MODE_CLASS (mode) == MODE_INT
27289 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
27290 return MIN (32, computed);
27294 /* Output assembler code to FILE to increment profiler label # LABELNO
27295 for profiling a function entry. */
27297 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
27301 #ifndef NO_PROFILE_COUNTERS
27302 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
27305 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
27306 fputs ("\tcall\t*" MCOUNT_NAME "@GOTPCREL(%rip)\n", file);
27308 fputs ("\tcall\t" MCOUNT_NAME "\n", file);
27312 #ifndef NO_PROFILE_COUNTERS
27313 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
27316 fputs ("\tcall\t*" MCOUNT_NAME "@GOT(%ebx)\n", file);
27320 #ifndef NO_PROFILE_COUNTERS
27321 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
27324 fputs ("\tcall\t" MCOUNT_NAME "\n", file);
27328 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
27329 /* We don't have exact information about the insn sizes, but we may assume
27330 quite safely that we are informed about all 1 byte insns and memory
27331 address sizes. This is enough to eliminate unnecessary padding in
27335 min_insn_size (rtx insn)
27339 if (!INSN_P (insn) || !active_insn_p (insn))
27342 /* Discard alignments we've emit and jump instructions. */
27343 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
27344 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
27346 if (JUMP_TABLE_DATA_P (insn))
27349 /* Important case - calls are always 5 bytes.
27350 It is common to have many calls in the row. */
27352 && symbolic_reference_mentioned_p (PATTERN (insn))
27353 && !SIBLING_CALL_P (insn))
27355 len = get_attr_length (insn);
27359 /* For normal instructions we rely on get_attr_length being exact,
27360 with a few exceptions. */
27361 if (!JUMP_P (insn))
27363 enum attr_type type = get_attr_type (insn);
27368 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
27369 || asm_noperands (PATTERN (insn)) >= 0)
27376 /* Otherwise trust get_attr_length. */
27380 l = get_attr_length_address (insn);
27381 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
27390 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
27394 ix86_avoid_jump_mispredicts (void)
27396 rtx insn, start = get_insns ();
27397 int nbytes = 0, njumps = 0;
27400 /* Look for all minimal intervals of instructions containing 4 jumps.
27401 The intervals are bounded by START and INSN. NBYTES is the total
27402 size of instructions in the interval including INSN and not including
27403 START. When the NBYTES is smaller than 16 bytes, it is possible
27404 that the end of START and INSN ends up in the same 16byte page.
27406 The smallest offset in the page INSN can start is the case where START
27407 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
27408 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
27410 for (insn = start; insn; insn = NEXT_INSN (insn))
27414 if (LABEL_P (insn))
27416 int align = label_to_alignment (insn);
27417 int max_skip = label_to_max_skip (insn);
27421 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
27422 already in the current 16 byte page, because otherwise
27423 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
27424 bytes to reach 16 byte boundary. */
27426 || (align <= 3 && max_skip != (1 << align) - 1))
27429 fprintf (dump_file, "Label %i with max_skip %i\n",
27430 INSN_UID (insn), max_skip);
27433 while (nbytes + max_skip >= 16)
27435 start = NEXT_INSN (start);
27436 if ((JUMP_P (start)
27437 && GET_CODE (PATTERN (start)) != ADDR_VEC
27438 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27440 njumps--, isjump = 1;
27443 nbytes -= min_insn_size (start);
27449 min_size = min_insn_size (insn);
27450 nbytes += min_size;
27452 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
27453 INSN_UID (insn), min_size);
27455 && GET_CODE (PATTERN (insn)) != ADDR_VEC
27456 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
27464 start = NEXT_INSN (start);
27465 if ((JUMP_P (start)
27466 && GET_CODE (PATTERN (start)) != ADDR_VEC
27467 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27469 njumps--, isjump = 1;
27472 nbytes -= min_insn_size (start);
27474 gcc_assert (njumps >= 0);
27476 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
27477 INSN_UID (start), INSN_UID (insn), nbytes);
27479 if (njumps == 3 && isjump && nbytes < 16)
27481 int padsize = 15 - nbytes + min_insn_size (insn);
27484 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
27485 INSN_UID (insn), padsize);
27486 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
27492 /* AMD Athlon works faster
27493 when RET is not destination of conditional jump or directly preceded
27494 by other jump instruction. We avoid the penalty by inserting NOP just
27495 before the RET instructions in such cases. */
27497 ix86_pad_returns (void)
27502 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
27504 basic_block bb = e->src;
27505 rtx ret = BB_END (bb);
27507 bool replace = false;
27509 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
27510 || optimize_bb_for_size_p (bb))
27512 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
27513 if (active_insn_p (prev) || LABEL_P (prev))
27515 if (prev && LABEL_P (prev))
27520 FOR_EACH_EDGE (e, ei, bb->preds)
27521 if (EDGE_FREQUENCY (e) && e->src->index >= 0
27522 && !(e->flags & EDGE_FALLTHRU))
27527 prev = prev_active_insn (ret);
27529 && ((JUMP_P (prev) && any_condjump_p (prev))
27532 /* Empty functions get branch mispredict even when the jump destination
27533 is not visible to us. */
27534 if (!prev && !optimize_function_for_size_p (cfun))
27539 emit_jump_insn_before (gen_return_internal_long (), ret);
27545 /* Implement machine specific optimizations. We implement padding of returns
27546 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
27550 if (optimize && optimize_function_for_speed_p (cfun))
27552 if (TARGET_PAD_RETURNS)
27553 ix86_pad_returns ();
27554 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
27555 if (TARGET_FOUR_JUMP_LIMIT)
27556 ix86_avoid_jump_mispredicts ();
27561 /* Return nonzero when QImode register that must be represented via REX prefix
27564 x86_extended_QIreg_mentioned_p (rtx insn)
27567 extract_insn_cached (insn);
27568 for (i = 0; i < recog_data.n_operands; i++)
27569 if (REG_P (recog_data.operand[i])
27570 && REGNO (recog_data.operand[i]) > BX_REG)
27575 /* Return nonzero when P points to register encoded via REX prefix.
27576 Called via for_each_rtx. */
27578 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
27580 unsigned int regno;
27583 regno = REGNO (*p);
27584 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
27587 /* Return true when INSN mentions register that must be encoded using REX
27590 x86_extended_reg_mentioned_p (rtx insn)
27592 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
27593 extended_reg_mentioned_1, NULL);
27596 /* If profitable, negate (without causing overflow) integer constant
27597 of mode MODE at location LOC. Return true in this case. */
27599 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
27603 if (!CONST_INT_P (*loc))
27609 /* DImode x86_64 constants must fit in 32 bits. */
27610 gcc_assert (x86_64_immediate_operand (*loc, mode));
27621 gcc_unreachable ();
27624 /* Avoid overflows. */
27625 if (mode_signbit_p (mode, *loc))
27628 val = INTVAL (*loc);
27630 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
27631 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
27632 if ((val < 0 && val != -128)
27635 *loc = GEN_INT (-val);
27642 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
27643 optabs would emit if we didn't have TFmode patterns. */
27646 x86_emit_floatuns (rtx operands[2])
27648 rtx neglab, donelab, i0, i1, f0, in, out;
27649 enum machine_mode mode, inmode;
27651 inmode = GET_MODE (operands[1]);
27652 gcc_assert (inmode == SImode || inmode == DImode);
27655 in = force_reg (inmode, operands[1]);
27656 mode = GET_MODE (out);
27657 neglab = gen_label_rtx ();
27658 donelab = gen_label_rtx ();
27659 f0 = gen_reg_rtx (mode);
27661 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
27663 expand_float (out, in, 0);
27665 emit_jump_insn (gen_jump (donelab));
27668 emit_label (neglab);
27670 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
27672 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
27674 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
27676 expand_float (f0, i0, 0);
27678 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
27680 emit_label (donelab);
27683 /* AVX does not support 32-byte integer vector operations,
27684 thus the longest vector we are faced with is V16QImode. */
27685 #define MAX_VECT_LEN 16
27687 struct expand_vec_perm_d
27689 rtx target, op0, op1;
27690 unsigned char perm[MAX_VECT_LEN];
27691 enum machine_mode vmode;
27692 unsigned char nelt;
27696 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
27697 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
27699 /* Get a vector mode of the same size as the original but with elements
27700 twice as wide. This is only guaranteed to apply to integral vectors. */
27702 static inline enum machine_mode
27703 get_mode_wider_vector (enum machine_mode o)
27705 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
27706 enum machine_mode n = GET_MODE_WIDER_MODE (o);
27707 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
27708 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
27712 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27713 with all elements equal to VAR. Return true if successful. */
27716 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
27717 rtx target, rtx val)
27740 /* First attempt to recognize VAL as-is. */
27741 dup = gen_rtx_VEC_DUPLICATE (mode, val);
27742 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
27743 if (recog_memoized (insn) < 0)
27746 /* If that fails, force VAL into a register. */
27749 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
27750 seq = get_insns ();
27753 emit_insn_before (seq, insn);
27755 ok = recog_memoized (insn) >= 0;
27764 if (TARGET_SSE || TARGET_3DNOW_A)
27768 val = gen_lowpart (SImode, val);
27769 x = gen_rtx_TRUNCATE (HImode, val);
27770 x = gen_rtx_VEC_DUPLICATE (mode, x);
27771 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27784 struct expand_vec_perm_d dperm;
27788 memset (&dperm, 0, sizeof (dperm));
27789 dperm.target = target;
27790 dperm.vmode = mode;
27791 dperm.nelt = GET_MODE_NUNITS (mode);
27792 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
27794 /* Extend to SImode using a paradoxical SUBREG. */
27795 tmp1 = gen_reg_rtx (SImode);
27796 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27798 /* Insert the SImode value as low element of a V4SImode vector. */
27799 tmp2 = gen_lowpart (V4SImode, dperm.op0);
27800 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
27802 ok = (expand_vec_perm_1 (&dperm)
27803 || expand_vec_perm_broadcast_1 (&dperm));
27815 /* Replicate the value once into the next wider mode and recurse. */
27817 enum machine_mode smode, wsmode, wvmode;
27820 smode = GET_MODE_INNER (mode);
27821 wvmode = get_mode_wider_vector (mode);
27822 wsmode = GET_MODE_INNER (wvmode);
27824 val = convert_modes (wsmode, smode, val, true);
27825 x = expand_simple_binop (wsmode, ASHIFT, val,
27826 GEN_INT (GET_MODE_BITSIZE (smode)),
27827 NULL_RTX, 1, OPTAB_LIB_WIDEN);
27828 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
27830 x = gen_lowpart (wvmode, target);
27831 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
27839 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
27840 rtx x = gen_reg_rtx (hvmode);
27842 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
27845 x = gen_rtx_VEC_CONCAT (mode, x, x);
27846 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27855 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27856 whose ONE_VAR element is VAR, and other elements are zero. Return true
27860 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
27861 rtx target, rtx var, int one_var)
27863 enum machine_mode vsimode;
27866 bool use_vector_set = false;
27871 /* For SSE4.1, we normally use vector set. But if the second
27872 element is zero and inter-unit moves are OK, we use movq
27874 use_vector_set = (TARGET_64BIT
27876 && !(TARGET_INTER_UNIT_MOVES
27882 use_vector_set = TARGET_SSE4_1;
27885 use_vector_set = TARGET_SSE2;
27888 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
27895 use_vector_set = TARGET_AVX;
27898 /* Use ix86_expand_vector_set in 64bit mode only. */
27899 use_vector_set = TARGET_AVX && TARGET_64BIT;
27905 if (use_vector_set)
27907 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
27908 var = force_reg (GET_MODE_INNER (mode), var);
27909 ix86_expand_vector_set (mmx_ok, target, var, one_var);
27925 var = force_reg (GET_MODE_INNER (mode), var);
27926 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
27927 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27932 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
27933 new_target = gen_reg_rtx (mode);
27935 new_target = target;
27936 var = force_reg (GET_MODE_INNER (mode), var);
27937 x = gen_rtx_VEC_DUPLICATE (mode, var);
27938 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
27939 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
27942 /* We need to shuffle the value to the correct position, so
27943 create a new pseudo to store the intermediate result. */
27945 /* With SSE2, we can use the integer shuffle insns. */
27946 if (mode != V4SFmode && TARGET_SSE2)
27948 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
27950 GEN_INT (one_var == 1 ? 0 : 1),
27951 GEN_INT (one_var == 2 ? 0 : 1),
27952 GEN_INT (one_var == 3 ? 0 : 1)));
27953 if (target != new_target)
27954 emit_move_insn (target, new_target);
27958 /* Otherwise convert the intermediate result to V4SFmode and
27959 use the SSE1 shuffle instructions. */
27960 if (mode != V4SFmode)
27962 tmp = gen_reg_rtx (V4SFmode);
27963 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
27968 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
27970 GEN_INT (one_var == 1 ? 0 : 1),
27971 GEN_INT (one_var == 2 ? 0+4 : 1+4),
27972 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
27974 if (mode != V4SFmode)
27975 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
27976 else if (tmp != target)
27977 emit_move_insn (target, tmp);
27979 else if (target != new_target)
27980 emit_move_insn (target, new_target);
27985 vsimode = V4SImode;
27991 vsimode = V2SImode;
27997 /* Zero extend the variable element to SImode and recurse. */
27998 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
28000 x = gen_reg_rtx (vsimode);
28001 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
28003 gcc_unreachable ();
28005 emit_move_insn (target, gen_lowpart (mode, x));
28013 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28014 consisting of the values in VALS. It is known that all elements
28015 except ONE_VAR are constants. Return true if successful. */
28018 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
28019 rtx target, rtx vals, int one_var)
28021 rtx var = XVECEXP (vals, 0, one_var);
28022 enum machine_mode wmode;
28025 const_vec = copy_rtx (vals);
28026 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
28027 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
28035 /* For the two element vectors, it's just as easy to use
28036 the general case. */
28040 /* Use ix86_expand_vector_set in 64bit mode only. */
28063 /* There's no way to set one QImode entry easily. Combine
28064 the variable value with its adjacent constant value, and
28065 promote to an HImode set. */
28066 x = XVECEXP (vals, 0, one_var ^ 1);
28069 var = convert_modes (HImode, QImode, var, true);
28070 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
28071 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28072 x = GEN_INT (INTVAL (x) & 0xff);
28076 var = convert_modes (HImode, QImode, var, true);
28077 x = gen_int_mode (INTVAL (x) << 8, HImode);
28079 if (x != const0_rtx)
28080 var = expand_simple_binop (HImode, IOR, var, x, var,
28081 1, OPTAB_LIB_WIDEN);
28083 x = gen_reg_rtx (wmode);
28084 emit_move_insn (x, gen_lowpart (wmode, const_vec));
28085 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
28087 emit_move_insn (target, gen_lowpart (mode, x));
28094 emit_move_insn (target, const_vec);
28095 ix86_expand_vector_set (mmx_ok, target, var, one_var);
28099 /* A subroutine of ix86_expand_vector_init_general. Use vector
28100 concatenate to handle the most general case: all values variable,
28101 and none identical. */
28104 ix86_expand_vector_init_concat (enum machine_mode mode,
28105 rtx target, rtx *ops, int n)
28107 enum machine_mode cmode, hmode = VOIDmode;
28108 rtx first[8], second[4];
28148 gcc_unreachable ();
28151 if (!register_operand (ops[1], cmode))
28152 ops[1] = force_reg (cmode, ops[1]);
28153 if (!register_operand (ops[0], cmode))
28154 ops[0] = force_reg (cmode, ops[0]);
28155 emit_insn (gen_rtx_SET (VOIDmode, target,
28156 gen_rtx_VEC_CONCAT (mode, ops[0],
28176 gcc_unreachable ();
28192 gcc_unreachable ();
28197 /* FIXME: We process inputs backward to help RA. PR 36222. */
28200 for (; i > 0; i -= 2, j--)
28202 first[j] = gen_reg_rtx (cmode);
28203 v = gen_rtvec (2, ops[i - 1], ops[i]);
28204 ix86_expand_vector_init (false, first[j],
28205 gen_rtx_PARALLEL (cmode, v));
28211 gcc_assert (hmode != VOIDmode);
28212 for (i = j = 0; i < n; i += 2, j++)
28214 second[j] = gen_reg_rtx (hmode);
28215 ix86_expand_vector_init_concat (hmode, second [j],
28219 ix86_expand_vector_init_concat (mode, target, second, n);
28222 ix86_expand_vector_init_concat (mode, target, first, n);
28226 gcc_unreachable ();
28230 /* A subroutine of ix86_expand_vector_init_general. Use vector
28231 interleave to handle the most general case: all values variable,
28232 and none identical. */
28235 ix86_expand_vector_init_interleave (enum machine_mode mode,
28236 rtx target, rtx *ops, int n)
28238 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
28241 rtx (*gen_load_even) (rtx, rtx, rtx);
28242 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
28243 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
28248 gen_load_even = gen_vec_setv8hi;
28249 gen_interleave_first_low = gen_vec_interleave_lowv4si;
28250 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28251 inner_mode = HImode;
28252 first_imode = V4SImode;
28253 second_imode = V2DImode;
28254 third_imode = VOIDmode;
28257 gen_load_even = gen_vec_setv16qi;
28258 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
28259 gen_interleave_second_low = gen_vec_interleave_lowv4si;
28260 inner_mode = QImode;
28261 first_imode = V8HImode;
28262 second_imode = V4SImode;
28263 third_imode = V2DImode;
28266 gcc_unreachable ();
28269 for (i = 0; i < n; i++)
28271 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
28272 op0 = gen_reg_rtx (SImode);
28273 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
28275 /* Insert the SImode value as low element of V4SImode vector. */
28276 op1 = gen_reg_rtx (V4SImode);
28277 op0 = gen_rtx_VEC_MERGE (V4SImode,
28278 gen_rtx_VEC_DUPLICATE (V4SImode,
28280 CONST0_RTX (V4SImode),
28282 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
28284 /* Cast the V4SImode vector back to a vector in orignal mode. */
28285 op0 = gen_reg_rtx (mode);
28286 emit_move_insn (op0, gen_lowpart (mode, op1));
28288 /* Load even elements into the second positon. */
28289 emit_insn (gen_load_even (op0,
28290 force_reg (inner_mode,
28294 /* Cast vector to FIRST_IMODE vector. */
28295 ops[i] = gen_reg_rtx (first_imode);
28296 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
28299 /* Interleave low FIRST_IMODE vectors. */
28300 for (i = j = 0; i < n; i += 2, j++)
28302 op0 = gen_reg_rtx (first_imode);
28303 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
28305 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
28306 ops[j] = gen_reg_rtx (second_imode);
28307 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
28310 /* Interleave low SECOND_IMODE vectors. */
28311 switch (second_imode)
28314 for (i = j = 0; i < n / 2; i += 2, j++)
28316 op0 = gen_reg_rtx (second_imode);
28317 emit_insn (gen_interleave_second_low (op0, ops[i],
28320 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
28322 ops[j] = gen_reg_rtx (third_imode);
28323 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
28325 second_imode = V2DImode;
28326 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28330 op0 = gen_reg_rtx (second_imode);
28331 emit_insn (gen_interleave_second_low (op0, ops[0],
28334 /* Cast the SECOND_IMODE vector back to a vector on original
28336 emit_insn (gen_rtx_SET (VOIDmode, target,
28337 gen_lowpart (mode, op0)));
28341 gcc_unreachable ();
28345 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
28346 all values variable, and none identical. */
28349 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
28350 rtx target, rtx vals)
28352 rtx ops[32], op0, op1;
28353 enum machine_mode half_mode = VOIDmode;
28360 if (!mmx_ok && !TARGET_SSE)
28372 n = GET_MODE_NUNITS (mode);
28373 for (i = 0; i < n; i++)
28374 ops[i] = XVECEXP (vals, 0, i);
28375 ix86_expand_vector_init_concat (mode, target, ops, n);
28379 half_mode = V16QImode;
28383 half_mode = V8HImode;
28387 n = GET_MODE_NUNITS (mode);
28388 for (i = 0; i < n; i++)
28389 ops[i] = XVECEXP (vals, 0, i);
28390 op0 = gen_reg_rtx (half_mode);
28391 op1 = gen_reg_rtx (half_mode);
28392 ix86_expand_vector_init_interleave (half_mode, op0, ops,
28394 ix86_expand_vector_init_interleave (half_mode, op1,
28395 &ops [n >> 1], n >> 2);
28396 emit_insn (gen_rtx_SET (VOIDmode, target,
28397 gen_rtx_VEC_CONCAT (mode, op0, op1)));
28401 if (!TARGET_SSE4_1)
28409 /* Don't use ix86_expand_vector_init_interleave if we can't
28410 move from GPR to SSE register directly. */
28411 if (!TARGET_INTER_UNIT_MOVES)
28414 n = GET_MODE_NUNITS (mode);
28415 for (i = 0; i < n; i++)
28416 ops[i] = XVECEXP (vals, 0, i);
28417 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
28425 gcc_unreachable ();
28429 int i, j, n_elts, n_words, n_elt_per_word;
28430 enum machine_mode inner_mode;
28431 rtx words[4], shift;
28433 inner_mode = GET_MODE_INNER (mode);
28434 n_elts = GET_MODE_NUNITS (mode);
28435 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
28436 n_elt_per_word = n_elts / n_words;
28437 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
28439 for (i = 0; i < n_words; ++i)
28441 rtx word = NULL_RTX;
28443 for (j = 0; j < n_elt_per_word; ++j)
28445 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
28446 elt = convert_modes (word_mode, inner_mode, elt, true);
28452 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
28453 word, 1, OPTAB_LIB_WIDEN);
28454 word = expand_simple_binop (word_mode, IOR, word, elt,
28455 word, 1, OPTAB_LIB_WIDEN);
28463 emit_move_insn (target, gen_lowpart (mode, words[0]));
28464 else if (n_words == 2)
28466 rtx tmp = gen_reg_rtx (mode);
28467 emit_clobber (tmp);
28468 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
28469 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
28470 emit_move_insn (target, tmp);
28472 else if (n_words == 4)
28474 rtx tmp = gen_reg_rtx (V4SImode);
28475 gcc_assert (word_mode == SImode);
28476 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
28477 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
28478 emit_move_insn (target, gen_lowpart (mode, tmp));
28481 gcc_unreachable ();
28485 /* Initialize vector TARGET via VALS. Suppress the use of MMX
28486 instructions unless MMX_OK is true. */
28489 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
28491 enum machine_mode mode = GET_MODE (target);
28492 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28493 int n_elts = GET_MODE_NUNITS (mode);
28494 int n_var = 0, one_var = -1;
28495 bool all_same = true, all_const_zero = true;
28499 for (i = 0; i < n_elts; ++i)
28501 x = XVECEXP (vals, 0, i);
28502 if (!(CONST_INT_P (x)
28503 || GET_CODE (x) == CONST_DOUBLE
28504 || GET_CODE (x) == CONST_FIXED))
28505 n_var++, one_var = i;
28506 else if (x != CONST0_RTX (inner_mode))
28507 all_const_zero = false;
28508 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
28512 /* Constants are best loaded from the constant pool. */
28515 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
28519 /* If all values are identical, broadcast the value. */
28521 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
28522 XVECEXP (vals, 0, 0)))
28525 /* Values where only one field is non-constant are best loaded from
28526 the pool and overwritten via move later. */
28530 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
28531 XVECEXP (vals, 0, one_var),
28535 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
28539 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
28543 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
28545 enum machine_mode mode = GET_MODE (target);
28546 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28547 enum machine_mode half_mode;
28548 bool use_vec_merge = false;
28550 static rtx (*gen_extract[6][2]) (rtx, rtx)
28552 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
28553 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
28554 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
28555 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
28556 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
28557 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
28559 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
28561 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
28562 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
28563 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
28564 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
28565 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
28566 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
28576 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
28577 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
28579 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
28581 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
28582 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28588 use_vec_merge = TARGET_SSE4_1;
28596 /* For the two element vectors, we implement a VEC_CONCAT with
28597 the extraction of the other element. */
28599 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
28600 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
28603 op0 = val, op1 = tmp;
28605 op0 = tmp, op1 = val;
28607 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
28608 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28613 use_vec_merge = TARGET_SSE4_1;
28620 use_vec_merge = true;
28624 /* tmp = target = A B C D */
28625 tmp = copy_to_reg (target);
28626 /* target = A A B B */
28627 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
28628 /* target = X A B B */
28629 ix86_expand_vector_set (false, target, val, 0);
28630 /* target = A X C D */
28631 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28632 const1_rtx, const0_rtx,
28633 GEN_INT (2+4), GEN_INT (3+4)));
28637 /* tmp = target = A B C D */
28638 tmp = copy_to_reg (target);
28639 /* tmp = X B C D */
28640 ix86_expand_vector_set (false, tmp, val, 0);
28641 /* target = A B X D */
28642 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28643 const0_rtx, const1_rtx,
28644 GEN_INT (0+4), GEN_INT (3+4)));
28648 /* tmp = target = A B C D */
28649 tmp = copy_to_reg (target);
28650 /* tmp = X B C D */
28651 ix86_expand_vector_set (false, tmp, val, 0);
28652 /* target = A B X D */
28653 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28654 const0_rtx, const1_rtx,
28655 GEN_INT (2+4), GEN_INT (0+4)));
28659 gcc_unreachable ();
28664 use_vec_merge = TARGET_SSE4_1;
28668 /* Element 0 handled by vec_merge below. */
28671 use_vec_merge = true;
28677 /* With SSE2, use integer shuffles to swap element 0 and ELT,
28678 store into element 0, then shuffle them back. */
28682 order[0] = GEN_INT (elt);
28683 order[1] = const1_rtx;
28684 order[2] = const2_rtx;
28685 order[3] = GEN_INT (3);
28686 order[elt] = const0_rtx;
28688 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28689 order[1], order[2], order[3]));
28691 ix86_expand_vector_set (false, target, val, 0);
28693 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28694 order[1], order[2], order[3]));
28698 /* For SSE1, we have to reuse the V4SF code. */
28699 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
28700 gen_lowpart (SFmode, val), elt);
28705 use_vec_merge = TARGET_SSE2;
28708 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28712 use_vec_merge = TARGET_SSE4_1;
28719 half_mode = V16QImode;
28725 half_mode = V8HImode;
28731 half_mode = V4SImode;
28737 half_mode = V2DImode;
28743 half_mode = V4SFmode;
28749 half_mode = V2DFmode;
28755 /* Compute offset. */
28759 gcc_assert (i <= 1);
28761 /* Extract the half. */
28762 tmp = gen_reg_rtx (half_mode);
28763 emit_insn (gen_extract[j][i] (tmp, target));
28765 /* Put val in tmp at elt. */
28766 ix86_expand_vector_set (false, tmp, val, elt);
28769 emit_insn (gen_insert[j][i] (target, target, tmp));
28778 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
28779 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
28780 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28784 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28786 emit_move_insn (mem, target);
28788 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28789 emit_move_insn (tmp, val);
28791 emit_move_insn (target, mem);
28796 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
28798 enum machine_mode mode = GET_MODE (vec);
28799 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28800 bool use_vec_extr = false;
28813 use_vec_extr = true;
28817 use_vec_extr = TARGET_SSE4_1;
28829 tmp = gen_reg_rtx (mode);
28830 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
28831 GEN_INT (elt), GEN_INT (elt),
28832 GEN_INT (elt+4), GEN_INT (elt+4)));
28836 tmp = gen_reg_rtx (mode);
28837 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
28841 gcc_unreachable ();
28844 use_vec_extr = true;
28849 use_vec_extr = TARGET_SSE4_1;
28863 tmp = gen_reg_rtx (mode);
28864 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
28865 GEN_INT (elt), GEN_INT (elt),
28866 GEN_INT (elt), GEN_INT (elt)));
28870 tmp = gen_reg_rtx (mode);
28871 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
28875 gcc_unreachable ();
28878 use_vec_extr = true;
28883 /* For SSE1, we have to reuse the V4SF code. */
28884 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
28885 gen_lowpart (V4SFmode, vec), elt);
28891 use_vec_extr = TARGET_SSE2;
28894 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28898 use_vec_extr = TARGET_SSE4_1;
28902 /* ??? Could extract the appropriate HImode element and shift. */
28909 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
28910 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
28912 /* Let the rtl optimizers know about the zero extension performed. */
28913 if (inner_mode == QImode || inner_mode == HImode)
28915 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
28916 target = gen_lowpart (SImode, target);
28919 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28923 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28925 emit_move_insn (mem, vec);
28927 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28928 emit_move_insn (target, tmp);
28932 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
28933 pattern to reduce; DEST is the destination; IN is the input vector. */
28936 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
28938 rtx tmp1, tmp2, tmp3;
28940 tmp1 = gen_reg_rtx (V4SFmode);
28941 tmp2 = gen_reg_rtx (V4SFmode);
28942 tmp3 = gen_reg_rtx (V4SFmode);
28944 emit_insn (gen_sse_movhlps (tmp1, in, in));
28945 emit_insn (fn (tmp2, tmp1, in));
28947 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
28948 const1_rtx, const1_rtx,
28949 GEN_INT (1+4), GEN_INT (1+4)));
28950 emit_insn (fn (dest, tmp2, tmp3));
28953 /* Target hook for scalar_mode_supported_p. */
28955 ix86_scalar_mode_supported_p (enum machine_mode mode)
28957 if (DECIMAL_FLOAT_MODE_P (mode))
28958 return default_decimal_float_supported_p ();
28959 else if (mode == TFmode)
28962 return default_scalar_mode_supported_p (mode);
28965 /* Implements target hook vector_mode_supported_p. */
28967 ix86_vector_mode_supported_p (enum machine_mode mode)
28969 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
28971 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
28973 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
28975 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
28977 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
28982 /* Target hook for c_mode_for_suffix. */
28983 static enum machine_mode
28984 ix86_c_mode_for_suffix (char suffix)
28994 /* Worker function for TARGET_MD_ASM_CLOBBERS.
28996 We do this in the new i386 backend to maintain source compatibility
28997 with the old cc0-based compiler. */
29000 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
29001 tree inputs ATTRIBUTE_UNUSED,
29004 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
29006 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
29011 /* Implements target vector targetm.asm.encode_section_info. This
29012 is not used by netware. */
29014 static void ATTRIBUTE_UNUSED
29015 ix86_encode_section_info (tree decl, rtx rtl, int first)
29017 default_encode_section_info (decl, rtl, first);
29019 if (TREE_CODE (decl) == VAR_DECL
29020 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
29021 && ix86_in_large_data_p (decl))
29022 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
29025 /* Worker function for REVERSE_CONDITION. */
29028 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
29030 return (mode != CCFPmode && mode != CCFPUmode
29031 ? reverse_condition (code)
29032 : reverse_condition_maybe_unordered (code));
29035 /* Output code to perform an x87 FP register move, from OPERANDS[1]
29039 output_387_reg_move (rtx insn, rtx *operands)
29041 if (REG_P (operands[0]))
29043 if (REG_P (operands[1])
29044 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
29046 if (REGNO (operands[0]) == FIRST_STACK_REG)
29047 return output_387_ffreep (operands, 0);
29048 return "fstp\t%y0";
29050 if (STACK_TOP_P (operands[0]))
29051 return "fld%Z1\t%y1";
29054 else if (MEM_P (operands[0]))
29056 gcc_assert (REG_P (operands[1]));
29057 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
29058 return "fstp%Z0\t%y0";
29061 /* There is no non-popping store to memory for XFmode.
29062 So if we need one, follow the store with a load. */
29063 if (GET_MODE (operands[0]) == XFmode)
29064 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
29066 return "fst%Z0\t%y0";
29073 /* Output code to perform a conditional jump to LABEL, if C2 flag in
29074 FP status register is set. */
29077 ix86_emit_fp_unordered_jump (rtx label)
29079 rtx reg = gen_reg_rtx (HImode);
29082 emit_insn (gen_x86_fnstsw_1 (reg));
29084 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
29086 emit_insn (gen_x86_sahf_1 (reg));
29088 temp = gen_rtx_REG (CCmode, FLAGS_REG);
29089 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
29093 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
29095 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
29096 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
29099 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
29100 gen_rtx_LABEL_REF (VOIDmode, label),
29102 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
29104 emit_jump_insn (temp);
29105 predict_jump (REG_BR_PROB_BASE * 10 / 100);
29108 /* Output code to perform a log1p XFmode calculation. */
29110 void ix86_emit_i387_log1p (rtx op0, rtx op1)
29112 rtx label1 = gen_label_rtx ();
29113 rtx label2 = gen_label_rtx ();
29115 rtx tmp = gen_reg_rtx (XFmode);
29116 rtx tmp2 = gen_reg_rtx (XFmode);
29119 emit_insn (gen_absxf2 (tmp, op1));
29120 test = gen_rtx_GE (VOIDmode, tmp,
29121 CONST_DOUBLE_FROM_REAL_VALUE (
29122 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
29124 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
29126 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
29127 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
29128 emit_jump (label2);
29130 emit_label (label1);
29131 emit_move_insn (tmp, CONST1_RTX (XFmode));
29132 emit_insn (gen_addxf3 (tmp, op1, tmp));
29133 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
29134 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
29136 emit_label (label2);
29139 /* Output code to perform a Newton-Rhapson approximation of a single precision
29140 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
29142 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
29144 rtx x0, x1, e0, e1, two;
29146 x0 = gen_reg_rtx (mode);
29147 e0 = gen_reg_rtx (mode);
29148 e1 = gen_reg_rtx (mode);
29149 x1 = gen_reg_rtx (mode);
29151 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
29153 if (VECTOR_MODE_P (mode))
29154 two = ix86_build_const_vector (SFmode, true, two);
29156 two = force_reg (mode, two);
29158 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
29160 /* x0 = rcp(b) estimate */
29161 emit_insn (gen_rtx_SET (VOIDmode, x0,
29162 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
29165 emit_insn (gen_rtx_SET (VOIDmode, e0,
29166 gen_rtx_MULT (mode, x0, a)));
29168 emit_insn (gen_rtx_SET (VOIDmode, e1,
29169 gen_rtx_MULT (mode, x0, b)));
29171 emit_insn (gen_rtx_SET (VOIDmode, x1,
29172 gen_rtx_MINUS (mode, two, e1)));
29173 /* res = e0 * x1 */
29174 emit_insn (gen_rtx_SET (VOIDmode, res,
29175 gen_rtx_MULT (mode, e0, x1)));
29178 /* Output code to perform a Newton-Rhapson approximation of a
29179 single precision floating point [reciprocal] square root. */
29181 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
29184 rtx x0, e0, e1, e2, e3, mthree, mhalf;
29187 x0 = gen_reg_rtx (mode);
29188 e0 = gen_reg_rtx (mode);
29189 e1 = gen_reg_rtx (mode);
29190 e2 = gen_reg_rtx (mode);
29191 e3 = gen_reg_rtx (mode);
29193 real_from_integer (&r, VOIDmode, -3, -1, 0);
29194 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
29196 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
29197 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
29199 if (VECTOR_MODE_P (mode))
29201 mthree = ix86_build_const_vector (SFmode, true, mthree);
29202 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
29205 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
29206 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
29208 /* x0 = rsqrt(a) estimate */
29209 emit_insn (gen_rtx_SET (VOIDmode, x0,
29210 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
29213 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
29218 zero = gen_reg_rtx (mode);
29219 mask = gen_reg_rtx (mode);
29221 zero = force_reg (mode, CONST0_RTX(mode));
29222 emit_insn (gen_rtx_SET (VOIDmode, mask,
29223 gen_rtx_NE (mode, zero, a)));
29225 emit_insn (gen_rtx_SET (VOIDmode, x0,
29226 gen_rtx_AND (mode, x0, mask)));
29230 emit_insn (gen_rtx_SET (VOIDmode, e0,
29231 gen_rtx_MULT (mode, x0, a)));
29233 emit_insn (gen_rtx_SET (VOIDmode, e1,
29234 gen_rtx_MULT (mode, e0, x0)));
29237 mthree = force_reg (mode, mthree);
29238 emit_insn (gen_rtx_SET (VOIDmode, e2,
29239 gen_rtx_PLUS (mode, e1, mthree)));
29241 mhalf = force_reg (mode, mhalf);
29243 /* e3 = -.5 * x0 */
29244 emit_insn (gen_rtx_SET (VOIDmode, e3,
29245 gen_rtx_MULT (mode, x0, mhalf)));
29247 /* e3 = -.5 * e0 */
29248 emit_insn (gen_rtx_SET (VOIDmode, e3,
29249 gen_rtx_MULT (mode, e0, mhalf)));
29250 /* ret = e2 * e3 */
29251 emit_insn (gen_rtx_SET (VOIDmode, res,
29252 gen_rtx_MULT (mode, e2, e3)));
29255 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
29257 static void ATTRIBUTE_UNUSED
29258 i386_solaris_elf_named_section (const char *name, unsigned int flags,
29261 /* With Binutils 2.15, the "@unwind" marker must be specified on
29262 every occurrence of the ".eh_frame" section, not just the first
29265 && strcmp (name, ".eh_frame") == 0)
29267 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
29268 flags & SECTION_WRITE ? "aw" : "a");
29271 default_elf_asm_named_section (name, flags, decl);
29274 /* Return the mangling of TYPE if it is an extended fundamental type. */
29276 static const char *
29277 ix86_mangle_type (const_tree type)
29279 type = TYPE_MAIN_VARIANT (type);
29281 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
29282 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
29285 switch (TYPE_MODE (type))
29288 /* __float128 is "g". */
29291 /* "long double" or __float80 is "e". */
29298 /* For 32-bit code we can save PIC register setup by using
29299 __stack_chk_fail_local hidden function instead of calling
29300 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
29301 register, so it is better to call __stack_chk_fail directly. */
29304 ix86_stack_protect_fail (void)
29306 return TARGET_64BIT
29307 ? default_external_stack_protect_fail ()
29308 : default_hidden_stack_protect_fail ();
29311 /* Select a format to encode pointers in exception handling data. CODE
29312 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
29313 true if the symbol may be affected by dynamic relocations.
29315 ??? All x86 object file formats are capable of representing this.
29316 After all, the relocation needed is the same as for the call insn.
29317 Whether or not a particular assembler allows us to enter such, I
29318 guess we'll have to see. */
29320 asm_preferred_eh_data_format (int code, int global)
29324 int type = DW_EH_PE_sdata8;
29326 || ix86_cmodel == CM_SMALL_PIC
29327 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
29328 type = DW_EH_PE_sdata4;
29329 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
29331 if (ix86_cmodel == CM_SMALL
29332 || (ix86_cmodel == CM_MEDIUM && code))
29333 return DW_EH_PE_udata4;
29334 return DW_EH_PE_absptr;
29337 /* Expand copysign from SIGN to the positive value ABS_VALUE
29338 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
29341 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
29343 enum machine_mode mode = GET_MODE (sign);
29344 rtx sgn = gen_reg_rtx (mode);
29345 if (mask == NULL_RTX)
29347 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
29348 if (!VECTOR_MODE_P (mode))
29350 /* We need to generate a scalar mode mask in this case. */
29351 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29352 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29353 mask = gen_reg_rtx (mode);
29354 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29358 mask = gen_rtx_NOT (mode, mask);
29359 emit_insn (gen_rtx_SET (VOIDmode, sgn,
29360 gen_rtx_AND (mode, mask, sign)));
29361 emit_insn (gen_rtx_SET (VOIDmode, result,
29362 gen_rtx_IOR (mode, abs_value, sgn)));
29365 /* Expand fabs (OP0) and return a new rtx that holds the result. The
29366 mask for masking out the sign-bit is stored in *SMASK, if that is
29369 ix86_expand_sse_fabs (rtx op0, rtx *smask)
29371 enum machine_mode mode = GET_MODE (op0);
29374 xa = gen_reg_rtx (mode);
29375 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
29376 if (!VECTOR_MODE_P (mode))
29378 /* We need to generate a scalar mode mask in this case. */
29379 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29380 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29381 mask = gen_reg_rtx (mode);
29382 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29384 emit_insn (gen_rtx_SET (VOIDmode, xa,
29385 gen_rtx_AND (mode, op0, mask)));
29393 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
29394 swapping the operands if SWAP_OPERANDS is true. The expanded
29395 code is a forward jump to a newly created label in case the
29396 comparison is true. The generated label rtx is returned. */
29398 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
29399 bool swap_operands)
29410 label = gen_label_rtx ();
29411 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
29412 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29413 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
29414 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
29415 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
29416 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
29417 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
29418 JUMP_LABEL (tmp) = label;
29423 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
29424 using comparison code CODE. Operands are swapped for the comparison if
29425 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
29427 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
29428 bool swap_operands)
29430 enum machine_mode mode = GET_MODE (op0);
29431 rtx mask = gen_reg_rtx (mode);
29440 if (mode == DFmode)
29441 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
29442 gen_rtx_fmt_ee (code, mode, op0, op1)));
29444 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
29445 gen_rtx_fmt_ee (code, mode, op0, op1)));
29450 /* Generate and return a rtx of mode MODE for 2**n where n is the number
29451 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
29453 ix86_gen_TWO52 (enum machine_mode mode)
29455 REAL_VALUE_TYPE TWO52r;
29458 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
29459 TWO52 = const_double_from_real_value (TWO52r, mode);
29460 TWO52 = force_reg (mode, TWO52);
29465 /* Expand SSE sequence for computing lround from OP1 storing
29468 ix86_expand_lround (rtx op0, rtx op1)
29470 /* C code for the stuff we're doing below:
29471 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
29474 enum machine_mode mode = GET_MODE (op1);
29475 const struct real_format *fmt;
29476 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29479 /* load nextafter (0.5, 0.0) */
29480 fmt = REAL_MODE_FORMAT (mode);
29481 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29482 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29484 /* adj = copysign (0.5, op1) */
29485 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
29486 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
29488 /* adj = op1 + adj */
29489 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
29491 /* op0 = (imode)adj */
29492 expand_fix (op0, adj, 0);
29495 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
29498 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
29500 /* C code for the stuff we're doing below (for do_floor):
29502 xi -= (double)xi > op1 ? 1 : 0;
29505 enum machine_mode fmode = GET_MODE (op1);
29506 enum machine_mode imode = GET_MODE (op0);
29507 rtx ireg, freg, label, tmp;
29509 /* reg = (long)op1 */
29510 ireg = gen_reg_rtx (imode);
29511 expand_fix (ireg, op1, 0);
29513 /* freg = (double)reg */
29514 freg = gen_reg_rtx (fmode);
29515 expand_float (freg, ireg, 0);
29517 /* ireg = (freg > op1) ? ireg - 1 : ireg */
29518 label = ix86_expand_sse_compare_and_jump (UNLE,
29519 freg, op1, !do_floor);
29520 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
29521 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
29522 emit_move_insn (ireg, tmp);
29524 emit_label (label);
29525 LABEL_NUSES (label) = 1;
29527 emit_move_insn (op0, ireg);
29530 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
29531 result in OPERAND0. */
29533 ix86_expand_rint (rtx operand0, rtx operand1)
29535 /* C code for the stuff we're doing below:
29536 xa = fabs (operand1);
29537 if (!isless (xa, 2**52))
29539 xa = xa + 2**52 - 2**52;
29540 return copysign (xa, operand1);
29542 enum machine_mode mode = GET_MODE (operand0);
29543 rtx res, xa, label, TWO52, mask;
29545 res = gen_reg_rtx (mode);
29546 emit_move_insn (res, operand1);
29548 /* xa = abs (operand1) */
29549 xa = ix86_expand_sse_fabs (res, &mask);
29551 /* if (!isless (xa, TWO52)) goto label; */
29552 TWO52 = ix86_gen_TWO52 (mode);
29553 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29555 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29556 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29558 ix86_sse_copysign_to_positive (res, xa, res, mask);
29560 emit_label (label);
29561 LABEL_NUSES (label) = 1;
29563 emit_move_insn (operand0, res);
29566 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29569 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
29571 /* C code for the stuff we expand below.
29572 double xa = fabs (x), x2;
29573 if (!isless (xa, TWO52))
29575 xa = xa + TWO52 - TWO52;
29576 x2 = copysign (xa, x);
29585 enum machine_mode mode = GET_MODE (operand0);
29586 rtx xa, TWO52, tmp, label, one, res, mask;
29588 TWO52 = ix86_gen_TWO52 (mode);
29590 /* Temporary for holding the result, initialized to the input
29591 operand to ease control flow. */
29592 res = gen_reg_rtx (mode);
29593 emit_move_insn (res, operand1);
29595 /* xa = abs (operand1) */
29596 xa = ix86_expand_sse_fabs (res, &mask);
29598 /* if (!isless (xa, TWO52)) goto label; */
29599 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29601 /* xa = xa + TWO52 - TWO52; */
29602 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29603 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29605 /* xa = copysign (xa, operand1) */
29606 ix86_sse_copysign_to_positive (xa, xa, res, mask);
29608 /* generate 1.0 or -1.0 */
29609 one = force_reg (mode,
29610 const_double_from_real_value (do_floor
29611 ? dconst1 : dconstm1, mode));
29613 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29614 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29615 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29616 gen_rtx_AND (mode, one, tmp)));
29617 /* We always need to subtract here to preserve signed zero. */
29618 tmp = expand_simple_binop (mode, MINUS,
29619 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29620 emit_move_insn (res, tmp);
29622 emit_label (label);
29623 LABEL_NUSES (label) = 1;
29625 emit_move_insn (operand0, res);
29628 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29631 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
29633 /* C code for the stuff we expand below.
29634 double xa = fabs (x), x2;
29635 if (!isless (xa, TWO52))
29637 x2 = (double)(long)x;
29644 if (HONOR_SIGNED_ZEROS (mode))
29645 return copysign (x2, x);
29648 enum machine_mode mode = GET_MODE (operand0);
29649 rtx xa, xi, TWO52, tmp, label, one, res, mask;
29651 TWO52 = ix86_gen_TWO52 (mode);
29653 /* Temporary for holding the result, initialized to the input
29654 operand to ease control flow. */
29655 res = gen_reg_rtx (mode);
29656 emit_move_insn (res, operand1);
29658 /* xa = abs (operand1) */
29659 xa = ix86_expand_sse_fabs (res, &mask);
29661 /* if (!isless (xa, TWO52)) goto label; */
29662 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29664 /* xa = (double)(long)x */
29665 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29666 expand_fix (xi, res, 0);
29667 expand_float (xa, xi, 0);
29670 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29672 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29673 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29674 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29675 gen_rtx_AND (mode, one, tmp)));
29676 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
29677 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29678 emit_move_insn (res, tmp);
29680 if (HONOR_SIGNED_ZEROS (mode))
29681 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29683 emit_label (label);
29684 LABEL_NUSES (label) = 1;
29686 emit_move_insn (operand0, res);
29689 /* Expand SSE sequence for computing round from OPERAND1 storing
29690 into OPERAND0. Sequence that works without relying on DImode truncation
29691 via cvttsd2siq that is only available on 64bit targets. */
29693 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
29695 /* C code for the stuff we expand below.
29696 double xa = fabs (x), xa2, x2;
29697 if (!isless (xa, TWO52))
29699 Using the absolute value and copying back sign makes
29700 -0.0 -> -0.0 correct.
29701 xa2 = xa + TWO52 - TWO52;
29706 else if (dxa > 0.5)
29708 x2 = copysign (xa2, x);
29711 enum machine_mode mode = GET_MODE (operand0);
29712 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
29714 TWO52 = ix86_gen_TWO52 (mode);
29716 /* Temporary for holding the result, initialized to the input
29717 operand to ease control flow. */
29718 res = gen_reg_rtx (mode);
29719 emit_move_insn (res, operand1);
29721 /* xa = abs (operand1) */
29722 xa = ix86_expand_sse_fabs (res, &mask);
29724 /* if (!isless (xa, TWO52)) goto label; */
29725 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29727 /* xa2 = xa + TWO52 - TWO52; */
29728 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29729 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
29731 /* dxa = xa2 - xa; */
29732 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
29734 /* generate 0.5, 1.0 and -0.5 */
29735 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
29736 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
29737 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
29741 tmp = gen_reg_rtx (mode);
29742 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
29743 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
29744 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29745 gen_rtx_AND (mode, one, tmp)));
29746 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29747 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
29748 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
29749 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29750 gen_rtx_AND (mode, one, tmp)));
29751 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29753 /* res = copysign (xa2, operand1) */
29754 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
29756 emit_label (label);
29757 LABEL_NUSES (label) = 1;
29759 emit_move_insn (operand0, res);
29762 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29765 ix86_expand_trunc (rtx operand0, rtx operand1)
29767 /* C code for SSE variant we expand below.
29768 double xa = fabs (x), x2;
29769 if (!isless (xa, TWO52))
29771 x2 = (double)(long)x;
29772 if (HONOR_SIGNED_ZEROS (mode))
29773 return copysign (x2, x);
29776 enum machine_mode mode = GET_MODE (operand0);
29777 rtx xa, xi, TWO52, label, res, mask;
29779 TWO52 = ix86_gen_TWO52 (mode);
29781 /* Temporary for holding the result, initialized to the input
29782 operand to ease control flow. */
29783 res = gen_reg_rtx (mode);
29784 emit_move_insn (res, operand1);
29786 /* xa = abs (operand1) */
29787 xa = ix86_expand_sse_fabs (res, &mask);
29789 /* if (!isless (xa, TWO52)) goto label; */
29790 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29792 /* x = (double)(long)x */
29793 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29794 expand_fix (xi, res, 0);
29795 expand_float (res, xi, 0);
29797 if (HONOR_SIGNED_ZEROS (mode))
29798 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29800 emit_label (label);
29801 LABEL_NUSES (label) = 1;
29803 emit_move_insn (operand0, res);
29806 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29809 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
29811 enum machine_mode mode = GET_MODE (operand0);
29812 rtx xa, mask, TWO52, label, one, res, smask, tmp;
29814 /* C code for SSE variant we expand below.
29815 double xa = fabs (x), x2;
29816 if (!isless (xa, TWO52))
29818 xa2 = xa + TWO52 - TWO52;
29822 x2 = copysign (xa2, x);
29826 TWO52 = ix86_gen_TWO52 (mode);
29828 /* Temporary for holding the result, initialized to the input
29829 operand to ease control flow. */
29830 res = gen_reg_rtx (mode);
29831 emit_move_insn (res, operand1);
29833 /* xa = abs (operand1) */
29834 xa = ix86_expand_sse_fabs (res, &smask);
29836 /* if (!isless (xa, TWO52)) goto label; */
29837 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29839 /* res = xa + TWO52 - TWO52; */
29840 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29841 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
29842 emit_move_insn (res, tmp);
29845 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29847 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
29848 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
29849 emit_insn (gen_rtx_SET (VOIDmode, mask,
29850 gen_rtx_AND (mode, mask, one)));
29851 tmp = expand_simple_binop (mode, MINUS,
29852 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
29853 emit_move_insn (res, tmp);
29855 /* res = copysign (res, operand1) */
29856 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
29858 emit_label (label);
29859 LABEL_NUSES (label) = 1;
29861 emit_move_insn (operand0, res);
29864 /* Expand SSE sequence for computing round from OPERAND1 storing
29867 ix86_expand_round (rtx operand0, rtx operand1)
29869 /* C code for the stuff we're doing below:
29870 double xa = fabs (x);
29871 if (!isless (xa, TWO52))
29873 xa = (double)(long)(xa + nextafter (0.5, 0.0));
29874 return copysign (xa, x);
29876 enum machine_mode mode = GET_MODE (operand0);
29877 rtx res, TWO52, xa, label, xi, half, mask;
29878 const struct real_format *fmt;
29879 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29881 /* Temporary for holding the result, initialized to the input
29882 operand to ease control flow. */
29883 res = gen_reg_rtx (mode);
29884 emit_move_insn (res, operand1);
29886 TWO52 = ix86_gen_TWO52 (mode);
29887 xa = ix86_expand_sse_fabs (res, &mask);
29888 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29890 /* load nextafter (0.5, 0.0) */
29891 fmt = REAL_MODE_FORMAT (mode);
29892 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29893 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29895 /* xa = xa + 0.5 */
29896 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
29897 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
29899 /* xa = (double)(int64_t)xa */
29900 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29901 expand_fix (xi, xa, 0);
29902 expand_float (xa, xi, 0);
29904 /* res = copysign (xa, operand1) */
29905 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
29907 emit_label (label);
29908 LABEL_NUSES (label) = 1;
29910 emit_move_insn (operand0, res);
29914 /* Table of valid machine attributes. */
29915 static const struct attribute_spec ix86_attribute_table[] =
29917 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
29918 /* Stdcall attribute says callee is responsible for popping arguments
29919 if they are not variable. */
29920 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29921 /* Fastcall attribute says callee is responsible for popping arguments
29922 if they are not variable. */
29923 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29924 /* Thiscall attribute says callee is responsible for popping arguments
29925 if they are not variable. */
29926 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29927 /* Cdecl attribute says the callee is a normal C declaration */
29928 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29929 /* Regparm attribute specifies how many integer arguments are to be
29930 passed in registers. */
29931 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
29932 /* Sseregparm attribute says we are using x86_64 calling conventions
29933 for FP arguments. */
29934 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29935 /* force_align_arg_pointer says this function realigns the stack at entry. */
29936 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
29937 false, true, true, ix86_handle_cconv_attribute },
29938 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
29939 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
29940 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
29941 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
29943 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
29944 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
29945 #ifdef SUBTARGET_ATTRIBUTE_TABLE
29946 SUBTARGET_ATTRIBUTE_TABLE,
29948 /* ms_abi and sysv_abi calling convention function attributes. */
29949 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
29950 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
29951 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute },
29953 { NULL, 0, 0, false, false, false, NULL }
29956 /* Implement targetm.vectorize.builtin_vectorization_cost. */
29958 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
29959 tree vectype ATTRIBUTE_UNUSED,
29960 int misalign ATTRIBUTE_UNUSED)
29962 switch (type_of_cost)
29965 return ix86_cost->scalar_stmt_cost;
29968 return ix86_cost->scalar_load_cost;
29971 return ix86_cost->scalar_store_cost;
29974 return ix86_cost->vec_stmt_cost;
29977 return ix86_cost->vec_align_load_cost;
29980 return ix86_cost->vec_store_cost;
29982 case vec_to_scalar:
29983 return ix86_cost->vec_to_scalar_cost;
29985 case scalar_to_vec:
29986 return ix86_cost->scalar_to_vec_cost;
29988 case unaligned_load:
29989 case unaligned_store:
29990 return ix86_cost->vec_unalign_load_cost;
29992 case cond_branch_taken:
29993 return ix86_cost->cond_taken_branch_cost;
29995 case cond_branch_not_taken:
29996 return ix86_cost->cond_not_taken_branch_cost;
30002 gcc_unreachable ();
30007 /* Implement targetm.vectorize.builtin_vec_perm. */
30010 ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
30012 tree itype = TREE_TYPE (vec_type);
30013 bool u = TYPE_UNSIGNED (itype);
30014 enum machine_mode vmode = TYPE_MODE (vec_type);
30015 enum ix86_builtins fcode;
30016 bool ok = TARGET_SSE2;
30022 fcode = IX86_BUILTIN_VEC_PERM_V4DF;
30025 fcode = IX86_BUILTIN_VEC_PERM_V2DF;
30027 itype = ix86_get_builtin_type (IX86_BT_DI);
30032 fcode = IX86_BUILTIN_VEC_PERM_V8SF;
30036 fcode = IX86_BUILTIN_VEC_PERM_V4SF;
30038 itype = ix86_get_builtin_type (IX86_BT_SI);
30042 fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
30045 fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI;
30048 fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI;
30051 fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
30061 *mask_type = itype;
30062 return ix86_builtins[(int) fcode];
30065 /* Return a vector mode with twice as many elements as VMODE. */
30066 /* ??? Consider moving this to a table generated by genmodes.c. */
30068 static enum machine_mode
30069 doublesize_vector_mode (enum machine_mode vmode)
30073 case V2SFmode: return V4SFmode;
30074 case V1DImode: return V2DImode;
30075 case V2SImode: return V4SImode;
30076 case V4HImode: return V8HImode;
30077 case V8QImode: return V16QImode;
30079 case V2DFmode: return V4DFmode;
30080 case V4SFmode: return V8SFmode;
30081 case V2DImode: return V4DImode;
30082 case V4SImode: return V8SImode;
30083 case V8HImode: return V16HImode;
30084 case V16QImode: return V32QImode;
30086 case V4DFmode: return V8DFmode;
30087 case V8SFmode: return V16SFmode;
30088 case V4DImode: return V8DImode;
30089 case V8SImode: return V16SImode;
30090 case V16HImode: return V32HImode;
30091 case V32QImode: return V64QImode;
30094 gcc_unreachable ();
30098 /* Construct (set target (vec_select op0 (parallel perm))) and
30099 return true if that's a valid instruction in the active ISA. */
30102 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
30104 rtx rperm[MAX_VECT_LEN], x;
30107 for (i = 0; i < nelt; ++i)
30108 rperm[i] = GEN_INT (perm[i]);
30110 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
30111 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
30112 x = gen_rtx_SET (VOIDmode, target, x);
30115 if (recog_memoized (x) < 0)
30123 /* Similar, but generate a vec_concat from op0 and op1 as well. */
30126 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
30127 const unsigned char *perm, unsigned nelt)
30129 enum machine_mode v2mode;
30132 v2mode = doublesize_vector_mode (GET_MODE (op0));
30133 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
30134 return expand_vselect (target, x, perm, nelt);
30137 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
30138 in terms of blendp[sd] / pblendw / pblendvb. */
30141 expand_vec_perm_blend (struct expand_vec_perm_d *d)
30143 enum machine_mode vmode = d->vmode;
30144 unsigned i, mask, nelt = d->nelt;
30145 rtx target, op0, op1, x;
30147 if (!TARGET_SSE4_1 || d->op0 == d->op1)
30149 if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode))
30152 /* This is a blend, not a permute. Elements must stay in their
30153 respective lanes. */
30154 for (i = 0; i < nelt; ++i)
30156 unsigned e = d->perm[i];
30157 if (!(e == i || e == i + nelt))
30164 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
30165 decision should be extracted elsewhere, so that we only try that
30166 sequence once all budget==3 options have been tried. */
30168 /* For bytes, see if bytes move in pairs so we can use pblendw with
30169 an immediate argument, rather than pblendvb with a vector argument. */
30170 if (vmode == V16QImode)
30172 bool pblendw_ok = true;
30173 for (i = 0; i < 16 && pblendw_ok; i += 2)
30174 pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]);
30178 rtx rperm[16], vperm;
30180 for (i = 0; i < nelt; ++i)
30181 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
30183 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
30184 vperm = force_reg (V16QImode, vperm);
30186 emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm));
30191 target = d->target;
30203 for (i = 0; i < nelt; ++i)
30204 mask |= (d->perm[i] >= nelt) << i;
30208 for (i = 0; i < 2; ++i)
30209 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
30213 for (i = 0; i < 4; ++i)
30214 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
30218 for (i = 0; i < 8; ++i)
30219 mask |= (d->perm[i * 2] >= 16) << i;
30223 target = gen_lowpart (vmode, target);
30224 op0 = gen_lowpart (vmode, op0);
30225 op1 = gen_lowpart (vmode, op1);
30229 gcc_unreachable ();
30232 /* This matches five different patterns with the different modes. */
30233 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
30234 x = gen_rtx_SET (VOIDmode, target, x);
30240 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
30241 in terms of the variable form of vpermilps.
30243 Note that we will have already failed the immediate input vpermilps,
30244 which requires that the high and low part shuffle be identical; the
30245 variable form doesn't require that. */
30248 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
30250 rtx rperm[8], vperm;
30253 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
30256 /* We can only permute within the 128-bit lane. */
30257 for (i = 0; i < 8; ++i)
30259 unsigned e = d->perm[i];
30260 if (i < 4 ? e >= 4 : e < 4)
30267 for (i = 0; i < 8; ++i)
30269 unsigned e = d->perm[i];
30271 /* Within each 128-bit lane, the elements of op0 are numbered
30272 from 0 and the elements of op1 are numbered from 4. */
30278 rperm[i] = GEN_INT (e);
30281 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
30282 vperm = force_reg (V8SImode, vperm);
30283 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
30288 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
30289 in terms of pshufb or vpperm. */
30292 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
30294 unsigned i, nelt, eltsz;
30295 rtx rperm[16], vperm, target, op0, op1;
30297 if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP))
30299 if (GET_MODE_SIZE (d->vmode) != 16)
30306 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
30308 for (i = 0; i < nelt; ++i)
30310 unsigned j, e = d->perm[i];
30311 for (j = 0; j < eltsz; ++j)
30312 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
30315 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
30316 vperm = force_reg (V16QImode, vperm);
30318 target = gen_lowpart (V16QImode, d->target);
30319 op0 = gen_lowpart (V16QImode, d->op0);
30320 if (d->op0 == d->op1)
30321 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
30324 op1 = gen_lowpart (V16QImode, d->op1);
30325 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
30331 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
30332 in a single instruction. */
30335 expand_vec_perm_1 (struct expand_vec_perm_d *d)
30337 unsigned i, nelt = d->nelt;
30338 unsigned char perm2[MAX_VECT_LEN];
30340 /* Check plain VEC_SELECT first, because AVX has instructions that could
30341 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
30342 input where SEL+CONCAT may not. */
30343 if (d->op0 == d->op1)
30345 int mask = nelt - 1;
30347 for (i = 0; i < nelt; i++)
30348 perm2[i] = d->perm[i] & mask;
30350 if (expand_vselect (d->target, d->op0, perm2, nelt))
30353 /* There are plenty of patterns in sse.md that are written for
30354 SEL+CONCAT and are not replicated for a single op. Perhaps
30355 that should be changed, to avoid the nastiness here. */
30357 /* Recognize interleave style patterns, which means incrementing
30358 every other permutation operand. */
30359 for (i = 0; i < nelt; i += 2)
30361 perm2[i] = d->perm[i] & mask;
30362 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
30364 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
30367 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
30370 for (i = 0; i < nelt; i += 4)
30372 perm2[i + 0] = d->perm[i + 0] & mask;
30373 perm2[i + 1] = d->perm[i + 1] & mask;
30374 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
30375 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
30378 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
30383 /* Finally, try the fully general two operand permute. */
30384 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
30387 /* Recognize interleave style patterns with reversed operands. */
30388 if (d->op0 != d->op1)
30390 for (i = 0; i < nelt; ++i)
30392 unsigned e = d->perm[i];
30400 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
30404 /* Try the SSE4.1 blend variable merge instructions. */
30405 if (expand_vec_perm_blend (d))
30408 /* Try one of the AVX vpermil variable permutations. */
30409 if (expand_vec_perm_vpermil (d))
30412 /* Try the SSSE3 pshufb or XOP vpperm variable permutation. */
30413 if (expand_vec_perm_pshufb (d))
30419 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
30420 in terms of a pair of pshuflw + pshufhw instructions. */
30423 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
30425 unsigned char perm2[MAX_VECT_LEN];
30429 if (d->vmode != V8HImode || d->op0 != d->op1)
30432 /* The two permutations only operate in 64-bit lanes. */
30433 for (i = 0; i < 4; ++i)
30434 if (d->perm[i] >= 4)
30436 for (i = 4; i < 8; ++i)
30437 if (d->perm[i] < 4)
30443 /* Emit the pshuflw. */
30444 memcpy (perm2, d->perm, 4);
30445 for (i = 4; i < 8; ++i)
30447 ok = expand_vselect (d->target, d->op0, perm2, 8);
30450 /* Emit the pshufhw. */
30451 memcpy (perm2 + 4, d->perm + 4, 4);
30452 for (i = 0; i < 4; ++i)
30454 ok = expand_vselect (d->target, d->target, perm2, 8);
30460 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
30461 the permutation using the SSSE3 palignr instruction. This succeeds
30462 when all of the elements in PERM fit within one vector and we merely
30463 need to shift them down so that a single vector permutation has a
30464 chance to succeed. */
30467 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
30469 unsigned i, nelt = d->nelt;
30474 /* Even with AVX, palignr only operates on 128-bit vectors. */
30475 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
30478 min = nelt, max = 0;
30479 for (i = 0; i < nelt; ++i)
30481 unsigned e = d->perm[i];
30487 if (min == 0 || max - min >= nelt)
30490 /* Given that we have SSSE3, we know we'll be able to implement the
30491 single operand permutation after the palignr with pshufb. */
30495 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
30496 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
30497 gen_lowpart (TImode, d->op1),
30498 gen_lowpart (TImode, d->op0), shift));
30500 d->op0 = d->op1 = d->target;
30503 for (i = 0; i < nelt; ++i)
30505 unsigned e = d->perm[i] - min;
30511 /* Test for the degenerate case where the alignment by itself
30512 produces the desired permutation. */
30516 ok = expand_vec_perm_1 (d);
30522 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
30523 a two vector permutation into a single vector permutation by using
30524 an interleave operation to merge the vectors. */
30527 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
30529 struct expand_vec_perm_d dremap, dfinal;
30530 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
30531 unsigned contents, h1, h2, h3, h4;
30532 unsigned char remap[2 * MAX_VECT_LEN];
30536 if (d->op0 == d->op1)
30539 /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
30540 lanes. We can use similar techniques with the vperm2f128 instruction,
30541 but it requires slightly different logic. */
30542 if (GET_MODE_SIZE (d->vmode) != 16)
30545 /* Examine from whence the elements come. */
30547 for (i = 0; i < nelt; ++i)
30548 contents |= 1u << d->perm[i];
30550 /* Split the two input vectors into 4 halves. */
30551 h1 = (1u << nelt2) - 1;
30556 memset (remap, 0xff, sizeof (remap));
30559 /* If the elements from the low halves use interleave low, and similarly
30560 for interleave high. If the elements are from mis-matched halves, we
30561 can use shufps for V4SF/V4SI or do a DImode shuffle. */
30562 if ((contents & (h1 | h3)) == contents)
30564 for (i = 0; i < nelt2; ++i)
30567 remap[i + nelt] = i * 2 + 1;
30568 dremap.perm[i * 2] = i;
30569 dremap.perm[i * 2 + 1] = i + nelt;
30572 else if ((contents & (h2 | h4)) == contents)
30574 for (i = 0; i < nelt2; ++i)
30576 remap[i + nelt2] = i * 2;
30577 remap[i + nelt + nelt2] = i * 2 + 1;
30578 dremap.perm[i * 2] = i + nelt2;
30579 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
30582 else if ((contents & (h1 | h4)) == contents)
30584 for (i = 0; i < nelt2; ++i)
30587 remap[i + nelt + nelt2] = i + nelt2;
30588 dremap.perm[i] = i;
30589 dremap.perm[i + nelt2] = i + nelt + nelt2;
30593 dremap.vmode = V2DImode;
30595 dremap.perm[0] = 0;
30596 dremap.perm[1] = 3;
30599 else if ((contents & (h2 | h3)) == contents)
30601 for (i = 0; i < nelt2; ++i)
30603 remap[i + nelt2] = i;
30604 remap[i + nelt] = i + nelt2;
30605 dremap.perm[i] = i + nelt2;
30606 dremap.perm[i + nelt2] = i + nelt;
30610 dremap.vmode = V2DImode;
30612 dremap.perm[0] = 1;
30613 dremap.perm[1] = 2;
30619 /* Use the remapping array set up above to move the elements from their
30620 swizzled locations into their final destinations. */
30622 for (i = 0; i < nelt; ++i)
30624 unsigned e = remap[d->perm[i]];
30625 gcc_assert (e < nelt);
30626 dfinal.perm[i] = e;
30628 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
30629 dfinal.op1 = dfinal.op0;
30630 dremap.target = dfinal.op0;
30632 /* Test if the final remap can be done with a single insn. For V4SFmode or
30633 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
30635 ok = expand_vec_perm_1 (&dfinal);
30636 seq = get_insns ();
30642 if (dremap.vmode != dfinal.vmode)
30644 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
30645 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
30646 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
30649 ok = expand_vec_perm_1 (&dremap);
30656 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
30657 permutation with two pshufb insns and an ior. We should have already
30658 failed all two instruction sequences. */
30661 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
30663 rtx rperm[2][16], vperm, l, h, op, m128;
30664 unsigned int i, nelt, eltsz;
30666 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
30668 gcc_assert (d->op0 != d->op1);
30671 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
30673 /* Generate two permutation masks. If the required element is within
30674 the given vector it is shuffled into the proper lane. If the required
30675 element is in the other vector, force a zero into the lane by setting
30676 bit 7 in the permutation mask. */
30677 m128 = GEN_INT (-128);
30678 for (i = 0; i < nelt; ++i)
30680 unsigned j, e = d->perm[i];
30681 unsigned which = (e >= nelt);
30685 for (j = 0; j < eltsz; ++j)
30687 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
30688 rperm[1-which][i*eltsz + j] = m128;
30692 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
30693 vperm = force_reg (V16QImode, vperm);
30695 l = gen_reg_rtx (V16QImode);
30696 op = gen_lowpart (V16QImode, d->op0);
30697 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
30699 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
30700 vperm = force_reg (V16QImode, vperm);
30702 h = gen_reg_rtx (V16QImode);
30703 op = gen_lowpart (V16QImode, d->op1);
30704 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
30706 op = gen_lowpart (V16QImode, d->target);
30707 emit_insn (gen_iorv16qi3 (op, l, h));
30712 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
30713 and extract-odd permutations. */
30716 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
30718 rtx t1, t2, t3, t4;
30723 t1 = gen_reg_rtx (V4DFmode);
30724 t2 = gen_reg_rtx (V4DFmode);
30726 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
30727 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
30728 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
30730 /* Now an unpck[lh]pd will produce the result required. */
30732 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
30734 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
30740 static const unsigned char perm1[8] = { 0, 2, 1, 3, 5, 6, 5, 7 };
30741 static const unsigned char perme[8] = { 0, 1, 8, 9, 4, 5, 12, 13 };
30742 static const unsigned char permo[8] = { 2, 3, 10, 11, 6, 7, 14, 15 };
30744 t1 = gen_reg_rtx (V8SFmode);
30745 t2 = gen_reg_rtx (V8SFmode);
30746 t3 = gen_reg_rtx (V8SFmode);
30747 t4 = gen_reg_rtx (V8SFmode);
30749 /* Shuffle within the 128-bit lanes to produce:
30750 { 0 2 1 3 4 6 5 7 } and { 8 a 9 b c e d f }. */
30751 expand_vselect (t1, d->op0, perm1, 8);
30752 expand_vselect (t2, d->op1, perm1, 8);
30754 /* Shuffle the lanes around to produce:
30755 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
30756 emit_insn (gen_avx_vperm2f128v8sf3 (t3, t1, t2, GEN_INT (0x20)));
30757 emit_insn (gen_avx_vperm2f128v8sf3 (t4, t1, t2, GEN_INT (0x31)));
30759 /* Now a vpermil2p will produce the result required. */
30760 /* ??? The vpermil2p requires a vector constant. Another option
30761 is a unpck[lh]ps to merge the two vectors to produce
30762 { 0 4 2 6 8 c a e } or { 1 5 3 7 9 d b f }. Then use another
30763 vpermilps to get the elements into the final order. */
30766 memcpy (d->perm, odd ? permo: perme, 8);
30767 expand_vec_perm_vpermil (d);
30775 /* These are always directly implementable by expand_vec_perm_1. */
30776 gcc_unreachable ();
30780 return expand_vec_perm_pshufb2 (d);
30783 /* We need 2*log2(N)-1 operations to achieve odd/even
30784 with interleave. */
30785 t1 = gen_reg_rtx (V8HImode);
30786 t2 = gen_reg_rtx (V8HImode);
30787 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
30788 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
30789 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
30790 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
30792 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
30794 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
30801 return expand_vec_perm_pshufb2 (d);
30804 t1 = gen_reg_rtx (V16QImode);
30805 t2 = gen_reg_rtx (V16QImode);
30806 t3 = gen_reg_rtx (V16QImode);
30807 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
30808 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
30809 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
30810 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
30811 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
30812 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
30814 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
30816 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
30822 gcc_unreachable ();
30828 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
30829 extract-even and extract-odd permutations. */
30832 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
30834 unsigned i, odd, nelt = d->nelt;
30837 if (odd != 0 && odd != 1)
30840 for (i = 1; i < nelt; ++i)
30841 if (d->perm[i] != 2 * i + odd)
30844 return expand_vec_perm_even_odd_1 (d, odd);
30847 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
30848 permutations. We assume that expand_vec_perm_1 has already failed. */
30851 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
30853 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
30854 enum machine_mode vmode = d->vmode;
30855 unsigned char perm2[4];
30863 /* These are special-cased in sse.md so that we can optionally
30864 use the vbroadcast instruction. They expand to two insns
30865 if the input happens to be in a register. */
30866 gcc_unreachable ();
30872 /* These are always implementable using standard shuffle patterns. */
30873 gcc_unreachable ();
30877 /* These can be implemented via interleave. We save one insn by
30878 stopping once we have promoted to V4SImode and then use pshufd. */
30881 optab otab = vec_interleave_low_optab;
30885 otab = vec_interleave_high_optab;
30890 op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
30891 vmode = get_mode_wider_vector (vmode);
30892 op0 = gen_lowpart (vmode, op0);
30894 while (vmode != V4SImode);
30896 memset (perm2, elt, 4);
30897 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
30902 gcc_unreachable ();
30906 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
30907 broadcast permutations. */
30910 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
30912 unsigned i, elt, nelt = d->nelt;
30914 if (d->op0 != d->op1)
30918 for (i = 1; i < nelt; ++i)
30919 if (d->perm[i] != elt)
30922 return expand_vec_perm_broadcast_1 (d);
30925 /* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
30926 With all of the interface bits taken care of, perform the expansion
30927 in D and return true on success. */
30930 ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
30932 /* Try a single instruction expansion. */
30933 if (expand_vec_perm_1 (d))
30936 /* Try sequences of two instructions. */
30938 if (expand_vec_perm_pshuflw_pshufhw (d))
30941 if (expand_vec_perm_palignr (d))
30944 if (expand_vec_perm_interleave2 (d))
30947 if (expand_vec_perm_broadcast (d))
30950 /* Try sequences of three instructions. */
30952 if (expand_vec_perm_pshufb2 (d))
30955 /* ??? Look for narrow permutations whose element orderings would
30956 allow the promotion to a wider mode. */
30958 /* ??? Look for sequences of interleave or a wider permute that place
30959 the data into the correct lanes for a half-vector shuffle like
30960 pshuf[lh]w or vpermilps. */
30962 /* ??? Look for sequences of interleave that produce the desired results.
30963 The combinatorics of punpck[lh] get pretty ugly... */
30965 if (expand_vec_perm_even_odd (d))
30971 /* Extract the values from the vector CST into the permutation array in D.
30972 Return 0 on error, 1 if all values from the permutation come from the
30973 first vector, 2 if all values from the second vector, and 3 otherwise. */
30976 extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst)
30978 tree list = TREE_VECTOR_CST_ELTS (cst);
30979 unsigned i, nelt = d->nelt;
30982 for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
30984 unsigned HOST_WIDE_INT e;
30986 if (!host_integerp (TREE_VALUE (list), 1))
30988 e = tree_low_cst (TREE_VALUE (list), 1);
30992 ret |= (e < nelt ? 1 : 2);
30995 gcc_assert (list == NULL);
30997 /* For all elements from second vector, fold the elements to first. */
30999 for (i = 0; i < nelt; ++i)
31000 d->perm[i] -= nelt;
31006 ix86_expand_vec_perm_builtin (tree exp)
31008 struct expand_vec_perm_d d;
31009 tree arg0, arg1, arg2;
31011 arg0 = CALL_EXPR_ARG (exp, 0);
31012 arg1 = CALL_EXPR_ARG (exp, 1);
31013 arg2 = CALL_EXPR_ARG (exp, 2);
31015 d.vmode = TYPE_MODE (TREE_TYPE (arg0));
31016 d.nelt = GET_MODE_NUNITS (d.vmode);
31017 d.testing_p = false;
31018 gcc_assert (VECTOR_MODE_P (d.vmode));
31020 if (TREE_CODE (arg2) != VECTOR_CST)
31022 error_at (EXPR_LOCATION (exp),
31023 "vector permutation requires vector constant");
31027 switch (extract_vec_perm_cst (&d, arg2))
31033 error_at (EXPR_LOCATION (exp), "invalid vector permutation constant");
31037 if (!operand_equal_p (arg0, arg1, 0))
31039 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
31040 d.op0 = force_reg (d.vmode, d.op0);
31041 d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
31042 d.op1 = force_reg (d.vmode, d.op1);
31046 /* The elements of PERM do not suggest that only the first operand
31047 is used, but both operands are identical. Allow easier matching
31048 of the permutation by folding the permutation into the single
31051 unsigned i, nelt = d.nelt;
31052 for (i = 0; i < nelt; ++i)
31053 if (d.perm[i] >= nelt)
31059 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
31060 d.op0 = force_reg (d.vmode, d.op0);
31065 d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
31066 d.op0 = force_reg (d.vmode, d.op0);
31071 d.target = gen_reg_rtx (d.vmode);
31072 if (ix86_expand_vec_perm_builtin_1 (&d))
31075 /* For compiler generated permutations, we should never got here, because
31076 the compiler should also be checking the ok hook. But since this is a
31077 builtin the user has access too, so don't abort. */
31081 sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]);
31084 sorry ("vector permutation (%d %d %d %d)",
31085 d.perm[0], d.perm[1], d.perm[2], d.perm[3]);
31088 sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
31089 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
31090 d.perm[4], d.perm[5], d.perm[6], d.perm[7]);
31093 sorry ("vector permutation "
31094 "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
31095 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
31096 d.perm[4], d.perm[5], d.perm[6], d.perm[7],
31097 d.perm[8], d.perm[9], d.perm[10], d.perm[11],
31098 d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
31101 gcc_unreachable ();
31104 return CONST0_RTX (d.vmode);
31107 /* Implement targetm.vectorize.builtin_vec_perm_ok. */
31110 ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask)
31112 struct expand_vec_perm_d d;
31116 d.vmode = TYPE_MODE (vec_type);
31117 d.nelt = GET_MODE_NUNITS (d.vmode);
31118 d.testing_p = true;
31120 /* Given sufficient ISA support we can just return true here
31121 for selected vector modes. */
31122 if (GET_MODE_SIZE (d.vmode) == 16)
31124 /* All implementable with a single vpperm insn. */
31127 /* All implementable with 2 pshufb + 1 ior. */
31130 /* All implementable with shufpd or unpck[lh]pd. */
31135 vec_mask = extract_vec_perm_cst (&d, mask);
31137 /* This hook is cannot be called in response to something that the
31138 user does (unlike the builtin expander) so we shouldn't ever see
31139 an error generated from the extract. */
31140 gcc_assert (vec_mask > 0 && vec_mask <= 3);
31141 one_vec = (vec_mask != 3);
31143 /* Implementable with shufps or pshufd. */
31144 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
31147 /* Otherwise we have to go through the motions and see if we can
31148 figure out how to generate the requested permutation. */
31149 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
31150 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
31152 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
31155 ret = ix86_expand_vec_perm_builtin_1 (&d);
31162 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
31164 struct expand_vec_perm_d d;
31170 d.vmode = GET_MODE (targ);
31171 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
31172 d.testing_p = false;
31174 for (i = 0; i < nelt; ++i)
31175 d.perm[i] = i * 2 + odd;
31177 /* We'll either be able to implement the permutation directly... */
31178 if (expand_vec_perm_1 (&d))
31181 /* ... or we use the special-case patterns. */
31182 expand_vec_perm_even_odd_1 (&d, odd);
31185 /* This function returns the calling abi specific va_list type node.
31186 It returns the FNDECL specific va_list type. */
31189 ix86_fn_abi_va_list (tree fndecl)
31192 return va_list_type_node;
31193 gcc_assert (fndecl != NULL_TREE);
31195 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
31196 return ms_va_list_type_node;
31198 return sysv_va_list_type_node;
31201 /* Returns the canonical va_list type specified by TYPE. If there
31202 is no valid TYPE provided, it return NULL_TREE. */
31205 ix86_canonical_va_list_type (tree type)
31209 /* Resolve references and pointers to va_list type. */
31210 if (TREE_CODE (type) == MEM_REF)
31211 type = TREE_TYPE (type);
31212 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
31213 type = TREE_TYPE (type);
31214 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
31215 type = TREE_TYPE (type);
31219 wtype = va_list_type_node;
31220 gcc_assert (wtype != NULL_TREE);
31222 if (TREE_CODE (wtype) == ARRAY_TYPE)
31224 /* If va_list is an array type, the argument may have decayed
31225 to a pointer type, e.g. by being passed to another function.
31226 In that case, unwrap both types so that we can compare the
31227 underlying records. */
31228 if (TREE_CODE (htype) == ARRAY_TYPE
31229 || POINTER_TYPE_P (htype))
31231 wtype = TREE_TYPE (wtype);
31232 htype = TREE_TYPE (htype);
31235 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
31236 return va_list_type_node;
31237 wtype = sysv_va_list_type_node;
31238 gcc_assert (wtype != NULL_TREE);
31240 if (TREE_CODE (wtype) == ARRAY_TYPE)
31242 /* If va_list is an array type, the argument may have decayed
31243 to a pointer type, e.g. by being passed to another function.
31244 In that case, unwrap both types so that we can compare the
31245 underlying records. */
31246 if (TREE_CODE (htype) == ARRAY_TYPE
31247 || POINTER_TYPE_P (htype))
31249 wtype = TREE_TYPE (wtype);
31250 htype = TREE_TYPE (htype);
31253 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
31254 return sysv_va_list_type_node;
31255 wtype = ms_va_list_type_node;
31256 gcc_assert (wtype != NULL_TREE);
31258 if (TREE_CODE (wtype) == ARRAY_TYPE)
31260 /* If va_list is an array type, the argument may have decayed
31261 to a pointer type, e.g. by being passed to another function.
31262 In that case, unwrap both types so that we can compare the
31263 underlying records. */
31264 if (TREE_CODE (htype) == ARRAY_TYPE
31265 || POINTER_TYPE_P (htype))
31267 wtype = TREE_TYPE (wtype);
31268 htype = TREE_TYPE (htype);
31271 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
31272 return ms_va_list_type_node;
31275 return std_canonical_va_list_type (type);
31278 /* Iterate through the target-specific builtin types for va_list.
31279 IDX denotes the iterator, *PTREE is set to the result type of
31280 the va_list builtin, and *PNAME to its internal type.
31281 Returns zero if there is no element for this index, otherwise
31282 IDX should be increased upon the next call.
31283 Note, do not iterate a base builtin's name like __builtin_va_list.
31284 Used from c_common_nodes_and_builtins. */
31287 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
31297 *ptree = ms_va_list_type_node;
31298 *pname = "__builtin_ms_va_list";
31302 *ptree = sysv_va_list_type_node;
31303 *pname = "__builtin_sysv_va_list";
31311 /* Initialize the GCC target structure. */
31312 #undef TARGET_RETURN_IN_MEMORY
31313 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
31315 #undef TARGET_LEGITIMIZE_ADDRESS
31316 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
31318 #undef TARGET_ATTRIBUTE_TABLE
31319 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
31320 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
31321 # undef TARGET_MERGE_DECL_ATTRIBUTES
31322 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
31325 #undef TARGET_COMP_TYPE_ATTRIBUTES
31326 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
31328 #undef TARGET_INIT_BUILTINS
31329 #define TARGET_INIT_BUILTINS ix86_init_builtins
31330 #undef TARGET_BUILTIN_DECL
31331 #define TARGET_BUILTIN_DECL ix86_builtin_decl
31332 #undef TARGET_EXPAND_BUILTIN
31333 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
31335 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
31336 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
31337 ix86_builtin_vectorized_function
31339 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
31340 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
31342 #undef TARGET_BUILTIN_RECIPROCAL
31343 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
31345 #undef TARGET_ASM_FUNCTION_EPILOGUE
31346 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
31348 #undef TARGET_ENCODE_SECTION_INFO
31349 #ifndef SUBTARGET_ENCODE_SECTION_INFO
31350 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
31352 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
31355 #undef TARGET_ASM_OPEN_PAREN
31356 #define TARGET_ASM_OPEN_PAREN ""
31357 #undef TARGET_ASM_CLOSE_PAREN
31358 #define TARGET_ASM_CLOSE_PAREN ""
31360 #undef TARGET_ASM_BYTE_OP
31361 #define TARGET_ASM_BYTE_OP ASM_BYTE
31363 #undef TARGET_ASM_ALIGNED_HI_OP
31364 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
31365 #undef TARGET_ASM_ALIGNED_SI_OP
31366 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
31368 #undef TARGET_ASM_ALIGNED_DI_OP
31369 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
31372 #undef TARGET_ASM_UNALIGNED_HI_OP
31373 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
31374 #undef TARGET_ASM_UNALIGNED_SI_OP
31375 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
31376 #undef TARGET_ASM_UNALIGNED_DI_OP
31377 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
31379 #undef TARGET_PRINT_OPERAND
31380 #define TARGET_PRINT_OPERAND ix86_print_operand
31381 #undef TARGET_PRINT_OPERAND_ADDRESS
31382 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
31383 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
31384 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
31386 #undef TARGET_SCHED_ADJUST_COST
31387 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
31388 #undef TARGET_SCHED_ISSUE_RATE
31389 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
31390 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
31391 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
31392 ia32_multipass_dfa_lookahead
31394 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
31395 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
31398 #undef TARGET_HAVE_TLS
31399 #define TARGET_HAVE_TLS true
31401 #undef TARGET_CANNOT_FORCE_CONST_MEM
31402 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
31403 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
31404 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
31406 #undef TARGET_DELEGITIMIZE_ADDRESS
31407 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
31409 #undef TARGET_MS_BITFIELD_LAYOUT_P
31410 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
31413 #undef TARGET_BINDS_LOCAL_P
31414 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
31416 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
31417 #undef TARGET_BINDS_LOCAL_P
31418 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
31421 #undef TARGET_ASM_OUTPUT_MI_THUNK
31422 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
31423 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
31424 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
31426 #undef TARGET_ASM_FILE_START
31427 #define TARGET_ASM_FILE_START x86_file_start
31429 #undef TARGET_DEFAULT_TARGET_FLAGS
31430 #define TARGET_DEFAULT_TARGET_FLAGS \
31432 | TARGET_SUBTARGET_DEFAULT \
31433 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT \
31436 #undef TARGET_HANDLE_OPTION
31437 #define TARGET_HANDLE_OPTION ix86_handle_option
31439 #undef TARGET_REGISTER_MOVE_COST
31440 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
31441 #undef TARGET_MEMORY_MOVE_COST
31442 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
31443 #undef TARGET_RTX_COSTS
31444 #define TARGET_RTX_COSTS ix86_rtx_costs
31445 #undef TARGET_ADDRESS_COST
31446 #define TARGET_ADDRESS_COST ix86_address_cost
31448 #undef TARGET_FIXED_CONDITION_CODE_REGS
31449 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
31450 #undef TARGET_CC_MODES_COMPATIBLE
31451 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
31453 #undef TARGET_MACHINE_DEPENDENT_REORG
31454 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
31456 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
31457 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
31459 #undef TARGET_BUILD_BUILTIN_VA_LIST
31460 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
31462 #undef TARGET_ENUM_VA_LIST_P
31463 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
31465 #undef TARGET_FN_ABI_VA_LIST
31466 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
31468 #undef TARGET_CANONICAL_VA_LIST_TYPE
31469 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
31471 #undef TARGET_EXPAND_BUILTIN_VA_START
31472 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
31474 #undef TARGET_MD_ASM_CLOBBERS
31475 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
31477 #undef TARGET_PROMOTE_PROTOTYPES
31478 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
31479 #undef TARGET_STRUCT_VALUE_RTX
31480 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
31481 #undef TARGET_SETUP_INCOMING_VARARGS
31482 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
31483 #undef TARGET_MUST_PASS_IN_STACK
31484 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
31485 #undef TARGET_FUNCTION_ARG_ADVANCE
31486 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
31487 #undef TARGET_FUNCTION_ARG
31488 #define TARGET_FUNCTION_ARG ix86_function_arg
31489 #undef TARGET_PASS_BY_REFERENCE
31490 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
31491 #undef TARGET_INTERNAL_ARG_POINTER
31492 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
31493 #undef TARGET_UPDATE_STACK_BOUNDARY
31494 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
31495 #undef TARGET_GET_DRAP_RTX
31496 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
31497 #undef TARGET_STRICT_ARGUMENT_NAMING
31498 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
31499 #undef TARGET_STATIC_CHAIN
31500 #define TARGET_STATIC_CHAIN ix86_static_chain
31501 #undef TARGET_TRAMPOLINE_INIT
31502 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
31503 #undef TARGET_RETURN_POPS_ARGS
31504 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
31506 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
31507 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
31509 #undef TARGET_SCALAR_MODE_SUPPORTED_P
31510 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
31512 #undef TARGET_VECTOR_MODE_SUPPORTED_P
31513 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
31515 #undef TARGET_C_MODE_FOR_SUFFIX
31516 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
31519 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
31520 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
31523 #ifdef SUBTARGET_INSERT_ATTRIBUTES
31524 #undef TARGET_INSERT_ATTRIBUTES
31525 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
31528 #undef TARGET_MANGLE_TYPE
31529 #define TARGET_MANGLE_TYPE ix86_mangle_type
31531 #undef TARGET_STACK_PROTECT_FAIL
31532 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
31534 #undef TARGET_FUNCTION_VALUE
31535 #define TARGET_FUNCTION_VALUE ix86_function_value
31537 #undef TARGET_FUNCTION_VALUE_REGNO_P
31538 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
31540 #undef TARGET_SECONDARY_RELOAD
31541 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
31543 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
31544 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
31545 ix86_builtin_vectorization_cost
31546 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
31547 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
31548 ix86_vectorize_builtin_vec_perm
31549 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
31550 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
31551 ix86_vectorize_builtin_vec_perm_ok
31553 #undef TARGET_SET_CURRENT_FUNCTION
31554 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
31556 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
31557 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
31559 #undef TARGET_OPTION_SAVE
31560 #define TARGET_OPTION_SAVE ix86_function_specific_save
31562 #undef TARGET_OPTION_RESTORE
31563 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
31565 #undef TARGET_OPTION_PRINT
31566 #define TARGET_OPTION_PRINT ix86_function_specific_print
31568 #undef TARGET_CAN_INLINE_P
31569 #define TARGET_CAN_INLINE_P ix86_can_inline_p
31571 #undef TARGET_EXPAND_TO_RTL_HOOK
31572 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
31574 #undef TARGET_LEGITIMATE_ADDRESS_P
31575 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
31577 #undef TARGET_IRA_COVER_CLASSES
31578 #define TARGET_IRA_COVER_CLASSES i386_ira_cover_classes
31580 #undef TARGET_FRAME_POINTER_REQUIRED
31581 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
31583 #undef TARGET_CAN_ELIMINATE
31584 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
31586 #undef TARGET_ASM_CODE_END
31587 #define TARGET_ASM_CODE_END ix86_code_end
31589 struct gcc_target targetm = TARGET_INITIALIZER;
31591 #include "gt-i386.h"