1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
53 #include "tm-constrs.h"
57 #include "dwarf2out.h"
59 static rtx legitimize_dllimport_symbol (rtx, bool);
61 #ifndef CHECK_STACK_LIMIT
62 #define CHECK_STACK_LIMIT (-1)
65 /* Return index of given mode in mult and division cost tables. */
66 #define MODE_INDEX(mode) \
67 ((mode) == QImode ? 0 \
68 : (mode) == HImode ? 1 \
69 : (mode) == SImode ? 2 \
70 : (mode) == DImode ? 3 \
73 /* Processor costs (relative to an add) */
74 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
75 #define COSTS_N_BYTES(N) ((N) * 2)
77 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
80 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
81 COSTS_N_BYTES (2), /* cost of an add instruction */
82 COSTS_N_BYTES (3), /* cost of a lea instruction */
83 COSTS_N_BYTES (2), /* variable shift costs */
84 COSTS_N_BYTES (3), /* constant shift costs */
85 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 0, /* cost of multiply per each bit set */
91 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
92 COSTS_N_BYTES (3), /* HI */
93 COSTS_N_BYTES (3), /* SI */
94 COSTS_N_BYTES (3), /* DI */
95 COSTS_N_BYTES (5)}, /* other */
96 COSTS_N_BYTES (3), /* cost of movsx */
97 COSTS_N_BYTES (3), /* cost of movzx */
100 2, /* cost for loading QImode using movzbl */
101 {2, 2, 2}, /* cost of loading integer registers
102 in QImode, HImode and SImode.
103 Relative to reg-reg move (2). */
104 {2, 2, 2}, /* cost of storing integer registers */
105 2, /* cost of reg,reg fld/fst */
106 {2, 2, 2}, /* cost of loading fp registers
107 in SFmode, DFmode and XFmode */
108 {2, 2, 2}, /* cost of storing fp registers
109 in SFmode, DFmode and XFmode */
110 3, /* cost of moving MMX register */
111 {3, 3}, /* cost of loading MMX registers
112 in SImode and DImode */
113 {3, 3}, /* cost of storing MMX registers
114 in SImode and DImode */
115 3, /* cost of moving SSE register */
116 {3, 3, 3}, /* cost of loading SSE registers
117 in SImode, DImode and TImode */
118 {3, 3, 3}, /* cost of storing SSE registers
119 in SImode, DImode and TImode */
120 3, /* MMX or SSE register to integer */
121 0, /* size of l1 cache */
122 0, /* size of l2 cache */
123 0, /* size of prefetch block */
124 0, /* number of parallel prefetches */
126 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
127 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
128 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
129 COSTS_N_BYTES (2), /* cost of FABS instruction. */
130 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
131 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
132 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
133 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
134 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
135 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
136 1, /* scalar_stmt_cost. */
137 1, /* scalar load_cost. */
138 1, /* scalar_store_cost. */
139 1, /* vec_stmt_cost. */
140 1, /* vec_to_scalar_cost. */
141 1, /* scalar_to_vec_cost. */
142 1, /* vec_align_load_cost. */
143 1, /* vec_unalign_load_cost. */
144 1, /* vec_store_cost. */
145 1, /* cond_taken_branch_cost. */
146 1, /* cond_not_taken_branch_cost. */
149 /* Processor costs (relative to an add) */
151 struct processor_costs i386_cost = { /* 386 specific costs */
152 COSTS_N_INSNS (1), /* cost of an add instruction */
153 COSTS_N_INSNS (1), /* cost of a lea instruction */
154 COSTS_N_INSNS (3), /* variable shift costs */
155 COSTS_N_INSNS (2), /* constant shift costs */
156 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
157 COSTS_N_INSNS (6), /* HI */
158 COSTS_N_INSNS (6), /* SI */
159 COSTS_N_INSNS (6), /* DI */
160 COSTS_N_INSNS (6)}, /* other */
161 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
162 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
163 COSTS_N_INSNS (23), /* HI */
164 COSTS_N_INSNS (23), /* SI */
165 COSTS_N_INSNS (23), /* DI */
166 COSTS_N_INSNS (23)}, /* other */
167 COSTS_N_INSNS (3), /* cost of movsx */
168 COSTS_N_INSNS (2), /* cost of movzx */
169 15, /* "large" insn */
171 4, /* cost for loading QImode using movzbl */
172 {2, 4, 2}, /* cost of loading integer registers
173 in QImode, HImode and SImode.
174 Relative to reg-reg move (2). */
175 {2, 4, 2}, /* cost of storing integer registers */
176 2, /* cost of reg,reg fld/fst */
177 {8, 8, 8}, /* cost of loading fp registers
178 in SFmode, DFmode and XFmode */
179 {8, 8, 8}, /* cost of storing fp registers
180 in SFmode, DFmode and XFmode */
181 2, /* cost of moving MMX register */
182 {4, 8}, /* cost of loading MMX registers
183 in SImode and DImode */
184 {4, 8}, /* cost of storing MMX registers
185 in SImode and DImode */
186 2, /* cost of moving SSE register */
187 {4, 8, 16}, /* cost of loading SSE registers
188 in SImode, DImode and TImode */
189 {4, 8, 16}, /* cost of storing SSE registers
190 in SImode, DImode and TImode */
191 3, /* MMX or SSE register to integer */
192 0, /* size of l1 cache */
193 0, /* size of l2 cache */
194 0, /* size of prefetch block */
195 0, /* number of parallel prefetches */
197 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
198 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
199 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
200 COSTS_N_INSNS (22), /* cost of FABS instruction. */
201 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
202 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
203 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
204 DUMMY_STRINGOP_ALGS},
205 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
206 DUMMY_STRINGOP_ALGS},
207 1, /* scalar_stmt_cost. */
208 1, /* scalar load_cost. */
209 1, /* scalar_store_cost. */
210 1, /* vec_stmt_cost. */
211 1, /* vec_to_scalar_cost. */
212 1, /* scalar_to_vec_cost. */
213 1, /* vec_align_load_cost. */
214 2, /* vec_unalign_load_cost. */
215 1, /* vec_store_cost. */
216 3, /* cond_taken_branch_cost. */
217 1, /* cond_not_taken_branch_cost. */
221 struct processor_costs i486_cost = { /* 486 specific costs */
222 COSTS_N_INSNS (1), /* cost of an add instruction */
223 COSTS_N_INSNS (1), /* cost of a lea instruction */
224 COSTS_N_INSNS (3), /* variable shift costs */
225 COSTS_N_INSNS (2), /* constant shift costs */
226 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
227 COSTS_N_INSNS (12), /* HI */
228 COSTS_N_INSNS (12), /* SI */
229 COSTS_N_INSNS (12), /* DI */
230 COSTS_N_INSNS (12)}, /* other */
231 1, /* cost of multiply per each bit set */
232 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
233 COSTS_N_INSNS (40), /* HI */
234 COSTS_N_INSNS (40), /* SI */
235 COSTS_N_INSNS (40), /* DI */
236 COSTS_N_INSNS (40)}, /* other */
237 COSTS_N_INSNS (3), /* cost of movsx */
238 COSTS_N_INSNS (2), /* cost of movzx */
239 15, /* "large" insn */
241 4, /* cost for loading QImode using movzbl */
242 {2, 4, 2}, /* cost of loading integer registers
243 in QImode, HImode and SImode.
244 Relative to reg-reg move (2). */
245 {2, 4, 2}, /* cost of storing integer registers */
246 2, /* cost of reg,reg fld/fst */
247 {8, 8, 8}, /* cost of loading fp registers
248 in SFmode, DFmode and XFmode */
249 {8, 8, 8}, /* cost of storing fp registers
250 in SFmode, DFmode and XFmode */
251 2, /* cost of moving MMX register */
252 {4, 8}, /* cost of loading MMX registers
253 in SImode and DImode */
254 {4, 8}, /* cost of storing MMX registers
255 in SImode and DImode */
256 2, /* cost of moving SSE register */
257 {4, 8, 16}, /* cost of loading SSE registers
258 in SImode, DImode and TImode */
259 {4, 8, 16}, /* cost of storing SSE registers
260 in SImode, DImode and TImode */
261 3, /* MMX or SSE register to integer */
262 4, /* size of l1 cache. 486 has 8kB cache
263 shared for code and data, so 4kB is
264 not really precise. */
265 4, /* size of l2 cache */
266 0, /* size of prefetch block */
267 0, /* number of parallel prefetches */
269 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
270 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
271 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
272 COSTS_N_INSNS (3), /* cost of FABS instruction. */
273 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
274 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
275 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
276 DUMMY_STRINGOP_ALGS},
277 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
278 DUMMY_STRINGOP_ALGS},
279 1, /* scalar_stmt_cost. */
280 1, /* scalar load_cost. */
281 1, /* scalar_store_cost. */
282 1, /* vec_stmt_cost. */
283 1, /* vec_to_scalar_cost. */
284 1, /* scalar_to_vec_cost. */
285 1, /* vec_align_load_cost. */
286 2, /* vec_unalign_load_cost. */
287 1, /* vec_store_cost. */
288 3, /* cond_taken_branch_cost. */
289 1, /* cond_not_taken_branch_cost. */
293 struct processor_costs pentium_cost = {
294 COSTS_N_INSNS (1), /* cost of an add instruction */
295 COSTS_N_INSNS (1), /* cost of a lea instruction */
296 COSTS_N_INSNS (4), /* variable shift costs */
297 COSTS_N_INSNS (1), /* constant shift costs */
298 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
299 COSTS_N_INSNS (11), /* HI */
300 COSTS_N_INSNS (11), /* SI */
301 COSTS_N_INSNS (11), /* DI */
302 COSTS_N_INSNS (11)}, /* other */
303 0, /* cost of multiply per each bit set */
304 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
305 COSTS_N_INSNS (25), /* HI */
306 COSTS_N_INSNS (25), /* SI */
307 COSTS_N_INSNS (25), /* DI */
308 COSTS_N_INSNS (25)}, /* other */
309 COSTS_N_INSNS (3), /* cost of movsx */
310 COSTS_N_INSNS (2), /* cost of movzx */
311 8, /* "large" insn */
313 6, /* cost for loading QImode using movzbl */
314 {2, 4, 2}, /* cost of loading integer registers
315 in QImode, HImode and SImode.
316 Relative to reg-reg move (2). */
317 {2, 4, 2}, /* cost of storing integer registers */
318 2, /* cost of reg,reg fld/fst */
319 {2, 2, 6}, /* cost of loading fp registers
320 in SFmode, DFmode and XFmode */
321 {4, 4, 6}, /* cost of storing fp registers
322 in SFmode, DFmode and XFmode */
323 8, /* cost of moving MMX register */
324 {8, 8}, /* cost of loading MMX registers
325 in SImode and DImode */
326 {8, 8}, /* cost of storing MMX registers
327 in SImode and DImode */
328 2, /* cost of moving SSE register */
329 {4, 8, 16}, /* cost of loading SSE registers
330 in SImode, DImode and TImode */
331 {4, 8, 16}, /* cost of storing SSE registers
332 in SImode, DImode and TImode */
333 3, /* MMX or SSE register to integer */
334 8, /* size of l1 cache. */
335 8, /* size of l2 cache */
336 0, /* size of prefetch block */
337 0, /* number of parallel prefetches */
339 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
340 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
341 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
342 COSTS_N_INSNS (1), /* cost of FABS instruction. */
343 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
344 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
345 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
346 DUMMY_STRINGOP_ALGS},
347 {{libcall, {{-1, rep_prefix_4_byte}}},
348 DUMMY_STRINGOP_ALGS},
349 1, /* scalar_stmt_cost. */
350 1, /* scalar load_cost. */
351 1, /* scalar_store_cost. */
352 1, /* vec_stmt_cost. */
353 1, /* vec_to_scalar_cost. */
354 1, /* scalar_to_vec_cost. */
355 1, /* vec_align_load_cost. */
356 2, /* vec_unalign_load_cost. */
357 1, /* vec_store_cost. */
358 3, /* cond_taken_branch_cost. */
359 1, /* cond_not_taken_branch_cost. */
363 struct processor_costs pentiumpro_cost = {
364 COSTS_N_INSNS (1), /* cost of an add instruction */
365 COSTS_N_INSNS (1), /* cost of a lea instruction */
366 COSTS_N_INSNS (1), /* variable shift costs */
367 COSTS_N_INSNS (1), /* constant shift costs */
368 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
369 COSTS_N_INSNS (4), /* HI */
370 COSTS_N_INSNS (4), /* SI */
371 COSTS_N_INSNS (4), /* DI */
372 COSTS_N_INSNS (4)}, /* other */
373 0, /* cost of multiply per each bit set */
374 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
375 COSTS_N_INSNS (17), /* HI */
376 COSTS_N_INSNS (17), /* SI */
377 COSTS_N_INSNS (17), /* DI */
378 COSTS_N_INSNS (17)}, /* other */
379 COSTS_N_INSNS (1), /* cost of movsx */
380 COSTS_N_INSNS (1), /* cost of movzx */
381 8, /* "large" insn */
383 2, /* cost for loading QImode using movzbl */
384 {4, 4, 4}, /* cost of loading integer registers
385 in QImode, HImode and SImode.
386 Relative to reg-reg move (2). */
387 {2, 2, 2}, /* cost of storing integer registers */
388 2, /* cost of reg,reg fld/fst */
389 {2, 2, 6}, /* cost of loading fp registers
390 in SFmode, DFmode and XFmode */
391 {4, 4, 6}, /* cost of storing fp registers
392 in SFmode, DFmode and XFmode */
393 2, /* cost of moving MMX register */
394 {2, 2}, /* cost of loading MMX registers
395 in SImode and DImode */
396 {2, 2}, /* cost of storing MMX registers
397 in SImode and DImode */
398 2, /* cost of moving SSE register */
399 {2, 2, 8}, /* cost of loading SSE registers
400 in SImode, DImode and TImode */
401 {2, 2, 8}, /* cost of storing SSE registers
402 in SImode, DImode and TImode */
403 3, /* MMX or SSE register to integer */
404 8, /* size of l1 cache. */
405 256, /* size of l2 cache */
406 32, /* size of prefetch block */
407 6, /* number of parallel prefetches */
409 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
410 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
411 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
412 COSTS_N_INSNS (2), /* cost of FABS instruction. */
413 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
414 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
415 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
416 (we ensure the alignment). For small blocks inline loop is still a
417 noticeable win, for bigger blocks either rep movsl or rep movsb is
418 way to go. Rep movsb has apparently more expensive startup time in CPU,
419 but after 4K the difference is down in the noise. */
420 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
421 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
422 DUMMY_STRINGOP_ALGS},
423 {{rep_prefix_4_byte, {{1024, unrolled_loop},
424 {8192, rep_prefix_4_byte}, {-1, libcall}}},
425 DUMMY_STRINGOP_ALGS},
426 1, /* scalar_stmt_cost. */
427 1, /* scalar load_cost. */
428 1, /* scalar_store_cost. */
429 1, /* vec_stmt_cost. */
430 1, /* vec_to_scalar_cost. */
431 1, /* scalar_to_vec_cost. */
432 1, /* vec_align_load_cost. */
433 2, /* vec_unalign_load_cost. */
434 1, /* vec_store_cost. */
435 3, /* cond_taken_branch_cost. */
436 1, /* cond_not_taken_branch_cost. */
440 struct processor_costs geode_cost = {
441 COSTS_N_INSNS (1), /* cost of an add instruction */
442 COSTS_N_INSNS (1), /* cost of a lea instruction */
443 COSTS_N_INSNS (2), /* variable shift costs */
444 COSTS_N_INSNS (1), /* constant shift costs */
445 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
446 COSTS_N_INSNS (4), /* HI */
447 COSTS_N_INSNS (7), /* SI */
448 COSTS_N_INSNS (7), /* DI */
449 COSTS_N_INSNS (7)}, /* other */
450 0, /* cost of multiply per each bit set */
451 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
452 COSTS_N_INSNS (23), /* HI */
453 COSTS_N_INSNS (39), /* SI */
454 COSTS_N_INSNS (39), /* DI */
455 COSTS_N_INSNS (39)}, /* other */
456 COSTS_N_INSNS (1), /* cost of movsx */
457 COSTS_N_INSNS (1), /* cost of movzx */
458 8, /* "large" insn */
460 1, /* cost for loading QImode using movzbl */
461 {1, 1, 1}, /* cost of loading integer registers
462 in QImode, HImode and SImode.
463 Relative to reg-reg move (2). */
464 {1, 1, 1}, /* cost of storing integer registers */
465 1, /* cost of reg,reg fld/fst */
466 {1, 1, 1}, /* cost of loading fp registers
467 in SFmode, DFmode and XFmode */
468 {4, 6, 6}, /* cost of storing fp registers
469 in SFmode, DFmode and XFmode */
471 1, /* cost of moving MMX register */
472 {1, 1}, /* cost of loading MMX registers
473 in SImode and DImode */
474 {1, 1}, /* cost of storing MMX registers
475 in SImode and DImode */
476 1, /* cost of moving SSE register */
477 {1, 1, 1}, /* cost of loading SSE registers
478 in SImode, DImode and TImode */
479 {1, 1, 1}, /* cost of storing SSE registers
480 in SImode, DImode and TImode */
481 1, /* MMX or SSE register to integer */
482 64, /* size of l1 cache. */
483 128, /* size of l2 cache. */
484 32, /* size of prefetch block */
485 1, /* number of parallel prefetches */
487 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
488 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
489 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
490 COSTS_N_INSNS (1), /* cost of FABS instruction. */
491 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
492 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
493 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
494 DUMMY_STRINGOP_ALGS},
495 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
496 DUMMY_STRINGOP_ALGS},
497 1, /* scalar_stmt_cost. */
498 1, /* scalar load_cost. */
499 1, /* scalar_store_cost. */
500 1, /* vec_stmt_cost. */
501 1, /* vec_to_scalar_cost. */
502 1, /* scalar_to_vec_cost. */
503 1, /* vec_align_load_cost. */
504 2, /* vec_unalign_load_cost. */
505 1, /* vec_store_cost. */
506 3, /* cond_taken_branch_cost. */
507 1, /* cond_not_taken_branch_cost. */
511 struct processor_costs k6_cost = {
512 COSTS_N_INSNS (1), /* cost of an add instruction */
513 COSTS_N_INSNS (2), /* cost of a lea instruction */
514 COSTS_N_INSNS (1), /* variable shift costs */
515 COSTS_N_INSNS (1), /* constant shift costs */
516 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
517 COSTS_N_INSNS (3), /* HI */
518 COSTS_N_INSNS (3), /* SI */
519 COSTS_N_INSNS (3), /* DI */
520 COSTS_N_INSNS (3)}, /* other */
521 0, /* cost of multiply per each bit set */
522 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
523 COSTS_N_INSNS (18), /* HI */
524 COSTS_N_INSNS (18), /* SI */
525 COSTS_N_INSNS (18), /* DI */
526 COSTS_N_INSNS (18)}, /* other */
527 COSTS_N_INSNS (2), /* cost of movsx */
528 COSTS_N_INSNS (2), /* cost of movzx */
529 8, /* "large" insn */
531 3, /* cost for loading QImode using movzbl */
532 {4, 5, 4}, /* cost of loading integer registers
533 in QImode, HImode and SImode.
534 Relative to reg-reg move (2). */
535 {2, 3, 2}, /* cost of storing integer registers */
536 4, /* cost of reg,reg fld/fst */
537 {6, 6, 6}, /* cost of loading fp registers
538 in SFmode, DFmode and XFmode */
539 {4, 4, 4}, /* cost of storing fp registers
540 in SFmode, DFmode and XFmode */
541 2, /* cost of moving MMX register */
542 {2, 2}, /* cost of loading MMX registers
543 in SImode and DImode */
544 {2, 2}, /* cost of storing MMX registers
545 in SImode and DImode */
546 2, /* cost of moving SSE register */
547 {2, 2, 8}, /* cost of loading SSE registers
548 in SImode, DImode and TImode */
549 {2, 2, 8}, /* cost of storing SSE registers
550 in SImode, DImode and TImode */
551 6, /* MMX or SSE register to integer */
552 32, /* size of l1 cache. */
553 32, /* size of l2 cache. Some models
554 have integrated l2 cache, but
555 optimizing for k6 is not important
556 enough to worry about that. */
557 32, /* size of prefetch block */
558 1, /* number of parallel prefetches */
560 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
561 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
562 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
563 COSTS_N_INSNS (2), /* cost of FABS instruction. */
564 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
565 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
566 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
567 DUMMY_STRINGOP_ALGS},
568 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
569 DUMMY_STRINGOP_ALGS},
570 1, /* scalar_stmt_cost. */
571 1, /* scalar load_cost. */
572 1, /* scalar_store_cost. */
573 1, /* vec_stmt_cost. */
574 1, /* vec_to_scalar_cost. */
575 1, /* scalar_to_vec_cost. */
576 1, /* vec_align_load_cost. */
577 2, /* vec_unalign_load_cost. */
578 1, /* vec_store_cost. */
579 3, /* cond_taken_branch_cost. */
580 1, /* cond_not_taken_branch_cost. */
584 struct processor_costs athlon_cost = {
585 COSTS_N_INSNS (1), /* cost of an add instruction */
586 COSTS_N_INSNS (2), /* cost of a lea instruction */
587 COSTS_N_INSNS (1), /* variable shift costs */
588 COSTS_N_INSNS (1), /* constant shift costs */
589 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
590 COSTS_N_INSNS (5), /* HI */
591 COSTS_N_INSNS (5), /* SI */
592 COSTS_N_INSNS (5), /* DI */
593 COSTS_N_INSNS (5)}, /* other */
594 0, /* cost of multiply per each bit set */
595 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
596 COSTS_N_INSNS (26), /* HI */
597 COSTS_N_INSNS (42), /* SI */
598 COSTS_N_INSNS (74), /* DI */
599 COSTS_N_INSNS (74)}, /* other */
600 COSTS_N_INSNS (1), /* cost of movsx */
601 COSTS_N_INSNS (1), /* cost of movzx */
602 8, /* "large" insn */
604 4, /* cost for loading QImode using movzbl */
605 {3, 4, 3}, /* cost of loading integer registers
606 in QImode, HImode and SImode.
607 Relative to reg-reg move (2). */
608 {3, 4, 3}, /* cost of storing integer registers */
609 4, /* cost of reg,reg fld/fst */
610 {4, 4, 12}, /* cost of loading fp registers
611 in SFmode, DFmode and XFmode */
612 {6, 6, 8}, /* cost of storing fp registers
613 in SFmode, DFmode and XFmode */
614 2, /* cost of moving MMX register */
615 {4, 4}, /* cost of loading MMX registers
616 in SImode and DImode */
617 {4, 4}, /* cost of storing MMX registers
618 in SImode and DImode */
619 2, /* cost of moving SSE register */
620 {4, 4, 6}, /* cost of loading SSE registers
621 in SImode, DImode and TImode */
622 {4, 4, 5}, /* cost of storing SSE registers
623 in SImode, DImode and TImode */
624 5, /* MMX or SSE register to integer */
625 64, /* size of l1 cache. */
626 256, /* size of l2 cache. */
627 64, /* size of prefetch block */
628 6, /* number of parallel prefetches */
630 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
631 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
632 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
633 COSTS_N_INSNS (2), /* cost of FABS instruction. */
634 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
635 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
636 /* For some reason, Athlon deals better with REP prefix (relative to loops)
637 compared to K8. Alignment becomes important after 8 bytes for memcpy and
638 128 bytes for memset. */
639 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
640 DUMMY_STRINGOP_ALGS},
641 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
642 DUMMY_STRINGOP_ALGS},
643 1, /* scalar_stmt_cost. */
644 1, /* scalar load_cost. */
645 1, /* scalar_store_cost. */
646 1, /* vec_stmt_cost. */
647 1, /* vec_to_scalar_cost. */
648 1, /* scalar_to_vec_cost. */
649 1, /* vec_align_load_cost. */
650 2, /* vec_unalign_load_cost. */
651 1, /* vec_store_cost. */
652 3, /* cond_taken_branch_cost. */
653 1, /* cond_not_taken_branch_cost. */
657 struct processor_costs k8_cost = {
658 COSTS_N_INSNS (1), /* cost of an add instruction */
659 COSTS_N_INSNS (2), /* cost of a lea instruction */
660 COSTS_N_INSNS (1), /* variable shift costs */
661 COSTS_N_INSNS (1), /* constant shift costs */
662 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
663 COSTS_N_INSNS (4), /* HI */
664 COSTS_N_INSNS (3), /* SI */
665 COSTS_N_INSNS (4), /* DI */
666 COSTS_N_INSNS (5)}, /* other */
667 0, /* cost of multiply per each bit set */
668 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
669 COSTS_N_INSNS (26), /* HI */
670 COSTS_N_INSNS (42), /* SI */
671 COSTS_N_INSNS (74), /* DI */
672 COSTS_N_INSNS (74)}, /* other */
673 COSTS_N_INSNS (1), /* cost of movsx */
674 COSTS_N_INSNS (1), /* cost of movzx */
675 8, /* "large" insn */
677 4, /* cost for loading QImode using movzbl */
678 {3, 4, 3}, /* cost of loading integer registers
679 in QImode, HImode and SImode.
680 Relative to reg-reg move (2). */
681 {3, 4, 3}, /* cost of storing integer registers */
682 4, /* cost of reg,reg fld/fst */
683 {4, 4, 12}, /* cost of loading fp registers
684 in SFmode, DFmode and XFmode */
685 {6, 6, 8}, /* cost of storing fp registers
686 in SFmode, DFmode and XFmode */
687 2, /* cost of moving MMX register */
688 {3, 3}, /* cost of loading MMX registers
689 in SImode and DImode */
690 {4, 4}, /* cost of storing MMX registers
691 in SImode and DImode */
692 2, /* cost of moving SSE register */
693 {4, 3, 6}, /* cost of loading SSE registers
694 in SImode, DImode and TImode */
695 {4, 4, 5}, /* cost of storing SSE registers
696 in SImode, DImode and TImode */
697 5, /* MMX or SSE register to integer */
698 64, /* size of l1 cache. */
699 512, /* size of l2 cache. */
700 64, /* size of prefetch block */
701 /* New AMD processors never drop prefetches; if they cannot be performed
702 immediately, they are queued. We set number of simultaneous prefetches
703 to a large constant to reflect this (it probably is not a good idea not
704 to limit number of prefetches at all, as their execution also takes some
706 100, /* number of parallel prefetches */
708 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
709 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
710 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
711 COSTS_N_INSNS (2), /* cost of FABS instruction. */
712 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
713 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
714 /* K8 has optimized REP instruction for medium sized blocks, but for very
715 small blocks it is better to use loop. For large blocks, libcall can
716 do nontemporary accesses and beat inline considerably. */
717 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
718 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
719 {{libcall, {{8, loop}, {24, unrolled_loop},
720 {2048, rep_prefix_4_byte}, {-1, libcall}}},
721 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
722 4, /* scalar_stmt_cost. */
723 2, /* scalar load_cost. */
724 2, /* scalar_store_cost. */
725 5, /* vec_stmt_cost. */
726 0, /* vec_to_scalar_cost. */
727 2, /* scalar_to_vec_cost. */
728 2, /* vec_align_load_cost. */
729 3, /* vec_unalign_load_cost. */
730 3, /* vec_store_cost. */
731 3, /* cond_taken_branch_cost. */
732 2, /* cond_not_taken_branch_cost. */
735 struct processor_costs amdfam10_cost = {
736 COSTS_N_INSNS (1), /* cost of an add instruction */
737 COSTS_N_INSNS (2), /* cost of a lea instruction */
738 COSTS_N_INSNS (1), /* variable shift costs */
739 COSTS_N_INSNS (1), /* constant shift costs */
740 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
741 COSTS_N_INSNS (4), /* HI */
742 COSTS_N_INSNS (3), /* SI */
743 COSTS_N_INSNS (4), /* DI */
744 COSTS_N_INSNS (5)}, /* other */
745 0, /* cost of multiply per each bit set */
746 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
747 COSTS_N_INSNS (35), /* HI */
748 COSTS_N_INSNS (51), /* SI */
749 COSTS_N_INSNS (83), /* DI */
750 COSTS_N_INSNS (83)}, /* other */
751 COSTS_N_INSNS (1), /* cost of movsx */
752 COSTS_N_INSNS (1), /* cost of movzx */
753 8, /* "large" insn */
755 4, /* cost for loading QImode using movzbl */
756 {3, 4, 3}, /* cost of loading integer registers
757 in QImode, HImode and SImode.
758 Relative to reg-reg move (2). */
759 {3, 4, 3}, /* cost of storing integer registers */
760 4, /* cost of reg,reg fld/fst */
761 {4, 4, 12}, /* cost of loading fp registers
762 in SFmode, DFmode and XFmode */
763 {6, 6, 8}, /* cost of storing fp registers
764 in SFmode, DFmode and XFmode */
765 2, /* cost of moving MMX register */
766 {3, 3}, /* cost of loading MMX registers
767 in SImode and DImode */
768 {4, 4}, /* cost of storing MMX registers
769 in SImode and DImode */
770 2, /* cost of moving SSE register */
771 {4, 4, 3}, /* cost of loading SSE registers
772 in SImode, DImode and TImode */
773 {4, 4, 5}, /* cost of storing SSE registers
774 in SImode, DImode and TImode */
775 3, /* MMX or SSE register to integer */
777 MOVD reg64, xmmreg Double FSTORE 4
778 MOVD reg32, xmmreg Double FSTORE 4
780 MOVD reg64, xmmreg Double FADD 3
782 MOVD reg32, xmmreg Double FADD 3
784 64, /* size of l1 cache. */
785 512, /* size of l2 cache. */
786 64, /* size of prefetch block */
787 /* New AMD processors never drop prefetches; if they cannot be performed
788 immediately, they are queued. We set number of simultaneous prefetches
789 to a large constant to reflect this (it probably is not a good idea not
790 to limit number of prefetches at all, as their execution also takes some
792 100, /* number of parallel prefetches */
794 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
795 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
796 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
797 COSTS_N_INSNS (2), /* cost of FABS instruction. */
798 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
799 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
801 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
802 very small blocks it is better to use loop. For large blocks, libcall can
803 do nontemporary accesses and beat inline considerably. */
804 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
805 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
806 {{libcall, {{8, loop}, {24, unrolled_loop},
807 {2048, rep_prefix_4_byte}, {-1, libcall}}},
808 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
809 4, /* scalar_stmt_cost. */
810 2, /* scalar load_cost. */
811 2, /* scalar_store_cost. */
812 6, /* vec_stmt_cost. */
813 0, /* vec_to_scalar_cost. */
814 2, /* scalar_to_vec_cost. */
815 2, /* vec_align_load_cost. */
816 2, /* vec_unalign_load_cost. */
817 2, /* vec_store_cost. */
818 2, /* cond_taken_branch_cost. */
819 1, /* cond_not_taken_branch_cost. */
822 struct processor_costs bdver1_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (2), /* cost of a lea instruction */
825 COSTS_N_INSNS (1), /* variable shift costs */
826 COSTS_N_INSNS (1), /* constant shift costs */
827 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (4), /* HI */
829 COSTS_N_INSNS (3), /* SI */
830 COSTS_N_INSNS (4), /* DI */
831 COSTS_N_INSNS (5)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (35), /* HI */
835 COSTS_N_INSNS (51), /* SI */
836 COSTS_N_INSNS (83), /* DI */
837 COSTS_N_INSNS (83)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 8, /* "large" insn */
842 4, /* cost for loading QImode using movzbl */
843 {3, 4, 3}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {3, 4, 3}, /* cost of storing integer registers */
847 4, /* cost of reg,reg fld/fst */
848 {4, 4, 12}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {6, 6, 8}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {3, 3}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {4, 4}, /* cost of storing MMX registers
856 in SImode and DImode */
857 2, /* cost of moving SSE register */
858 {4, 4, 3}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {4, 4, 5}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 3, /* MMX or SSE register to integer */
864 MOVD reg64, xmmreg Double FSTORE 4
865 MOVD reg32, xmmreg Double FSTORE 4
867 MOVD reg64, xmmreg Double FADD 3
869 MOVD reg32, xmmreg Double FADD 3
871 64, /* size of l1 cache. */
872 1024, /* size of l2 cache. */
873 64, /* size of prefetch block */
874 /* New AMD processors never drop prefetches; if they cannot be performed
875 immediately, they are queued. We set number of simultaneous prefetches
876 to a large constant to reflect this (it probably is not a good idea not
877 to limit number of prefetches at all, as their execution also takes some
879 100, /* number of parallel prefetches */
881 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
882 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
883 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
884 COSTS_N_INSNS (2), /* cost of FABS instruction. */
885 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
886 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
888 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
889 very small blocks it is better to use loop. For large blocks, libcall
890 can do nontemporary accesses and beat inline considerably. */
891 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
892 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
893 {{libcall, {{8, loop}, {24, unrolled_loop},
894 {2048, rep_prefix_4_byte}, {-1, libcall}}},
895 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
896 4, /* scalar_stmt_cost. */
897 2, /* scalar load_cost. */
898 2, /* scalar_store_cost. */
899 6, /* vec_stmt_cost. */
900 0, /* vec_to_scalar_cost. */
901 2, /* scalar_to_vec_cost. */
902 2, /* vec_align_load_cost. */
903 2, /* vec_unalign_load_cost. */
904 2, /* vec_store_cost. */
905 2, /* cond_taken_branch_cost. */
906 1, /* cond_not_taken_branch_cost. */
910 struct processor_costs pentium4_cost = {
911 COSTS_N_INSNS (1), /* cost of an add instruction */
912 COSTS_N_INSNS (3), /* cost of a lea instruction */
913 COSTS_N_INSNS (4), /* variable shift costs */
914 COSTS_N_INSNS (4), /* constant shift costs */
915 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
916 COSTS_N_INSNS (15), /* HI */
917 COSTS_N_INSNS (15), /* SI */
918 COSTS_N_INSNS (15), /* DI */
919 COSTS_N_INSNS (15)}, /* other */
920 0, /* cost of multiply per each bit set */
921 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
922 COSTS_N_INSNS (56), /* HI */
923 COSTS_N_INSNS (56), /* SI */
924 COSTS_N_INSNS (56), /* DI */
925 COSTS_N_INSNS (56)}, /* other */
926 COSTS_N_INSNS (1), /* cost of movsx */
927 COSTS_N_INSNS (1), /* cost of movzx */
928 16, /* "large" insn */
930 2, /* cost for loading QImode using movzbl */
931 {4, 5, 4}, /* cost of loading integer registers
932 in QImode, HImode and SImode.
933 Relative to reg-reg move (2). */
934 {2, 3, 2}, /* cost of storing integer registers */
935 2, /* cost of reg,reg fld/fst */
936 {2, 2, 6}, /* cost of loading fp registers
937 in SFmode, DFmode and XFmode */
938 {4, 4, 6}, /* cost of storing fp registers
939 in SFmode, DFmode and XFmode */
940 2, /* cost of moving MMX register */
941 {2, 2}, /* cost of loading MMX registers
942 in SImode and DImode */
943 {2, 2}, /* cost of storing MMX registers
944 in SImode and DImode */
945 12, /* cost of moving SSE register */
946 {12, 12, 12}, /* cost of loading SSE registers
947 in SImode, DImode and TImode */
948 {2, 2, 8}, /* cost of storing SSE registers
949 in SImode, DImode and TImode */
950 10, /* MMX or SSE register to integer */
951 8, /* size of l1 cache. */
952 256, /* size of l2 cache. */
953 64, /* size of prefetch block */
954 6, /* number of parallel prefetches */
956 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
957 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
958 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
959 COSTS_N_INSNS (2), /* cost of FABS instruction. */
960 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
961 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
962 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
963 DUMMY_STRINGOP_ALGS},
964 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
966 DUMMY_STRINGOP_ALGS},
967 1, /* scalar_stmt_cost. */
968 1, /* scalar load_cost. */
969 1, /* scalar_store_cost. */
970 1, /* vec_stmt_cost. */
971 1, /* vec_to_scalar_cost. */
972 1, /* scalar_to_vec_cost. */
973 1, /* vec_align_load_cost. */
974 2, /* vec_unalign_load_cost. */
975 1, /* vec_store_cost. */
976 3, /* cond_taken_branch_cost. */
977 1, /* cond_not_taken_branch_cost. */
981 struct processor_costs nocona_cost = {
982 COSTS_N_INSNS (1), /* cost of an add instruction */
983 COSTS_N_INSNS (1), /* cost of a lea instruction */
984 COSTS_N_INSNS (1), /* variable shift costs */
985 COSTS_N_INSNS (1), /* constant shift costs */
986 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
987 COSTS_N_INSNS (10), /* HI */
988 COSTS_N_INSNS (10), /* SI */
989 COSTS_N_INSNS (10), /* DI */
990 COSTS_N_INSNS (10)}, /* other */
991 0, /* cost of multiply per each bit set */
992 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
993 COSTS_N_INSNS (66), /* HI */
994 COSTS_N_INSNS (66), /* SI */
995 COSTS_N_INSNS (66), /* DI */
996 COSTS_N_INSNS (66)}, /* other */
997 COSTS_N_INSNS (1), /* cost of movsx */
998 COSTS_N_INSNS (1), /* cost of movzx */
999 16, /* "large" insn */
1000 17, /* MOVE_RATIO */
1001 4, /* cost for loading QImode using movzbl */
1002 {4, 4, 4}, /* cost of loading integer registers
1003 in QImode, HImode and SImode.
1004 Relative to reg-reg move (2). */
1005 {4, 4, 4}, /* cost of storing integer registers */
1006 3, /* cost of reg,reg fld/fst */
1007 {12, 12, 12}, /* cost of loading fp registers
1008 in SFmode, DFmode and XFmode */
1009 {4, 4, 4}, /* cost of storing fp registers
1010 in SFmode, DFmode and XFmode */
1011 6, /* cost of moving MMX register */
1012 {12, 12}, /* cost of loading MMX registers
1013 in SImode and DImode */
1014 {12, 12}, /* cost of storing MMX registers
1015 in SImode and DImode */
1016 6, /* cost of moving SSE register */
1017 {12, 12, 12}, /* cost of loading SSE registers
1018 in SImode, DImode and TImode */
1019 {12, 12, 12}, /* cost of storing SSE registers
1020 in SImode, DImode and TImode */
1021 8, /* MMX or SSE register to integer */
1022 8, /* size of l1 cache. */
1023 1024, /* size of l2 cache. */
1024 128, /* size of prefetch block */
1025 8, /* number of parallel prefetches */
1026 1, /* Branch cost */
1027 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1028 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1029 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1030 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1031 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1032 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1033 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1034 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1035 {100000, unrolled_loop}, {-1, libcall}}}},
1036 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1038 {libcall, {{24, loop}, {64, unrolled_loop},
1039 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1040 1, /* scalar_stmt_cost. */
1041 1, /* scalar load_cost. */
1042 1, /* scalar_store_cost. */
1043 1, /* vec_stmt_cost. */
1044 1, /* vec_to_scalar_cost. */
1045 1, /* scalar_to_vec_cost. */
1046 1, /* vec_align_load_cost. */
1047 2, /* vec_unalign_load_cost. */
1048 1, /* vec_store_cost. */
1049 3, /* cond_taken_branch_cost. */
1050 1, /* cond_not_taken_branch_cost. */
1054 struct processor_costs core2_cost = {
1055 COSTS_N_INSNS (1), /* cost of an add instruction */
1056 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1057 COSTS_N_INSNS (1), /* variable shift costs */
1058 COSTS_N_INSNS (1), /* constant shift costs */
1059 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1060 COSTS_N_INSNS (3), /* HI */
1061 COSTS_N_INSNS (3), /* SI */
1062 COSTS_N_INSNS (3), /* DI */
1063 COSTS_N_INSNS (3)}, /* other */
1064 0, /* cost of multiply per each bit set */
1065 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
1066 COSTS_N_INSNS (22), /* HI */
1067 COSTS_N_INSNS (22), /* SI */
1068 COSTS_N_INSNS (22), /* DI */
1069 COSTS_N_INSNS (22)}, /* other */
1070 COSTS_N_INSNS (1), /* cost of movsx */
1071 COSTS_N_INSNS (1), /* cost of movzx */
1072 8, /* "large" insn */
1073 16, /* MOVE_RATIO */
1074 2, /* cost for loading QImode using movzbl */
1075 {6, 6, 6}, /* cost of loading integer registers
1076 in QImode, HImode and SImode.
1077 Relative to reg-reg move (2). */
1078 {4, 4, 4}, /* cost of storing integer registers */
1079 2, /* cost of reg,reg fld/fst */
1080 {6, 6, 6}, /* cost of loading fp registers
1081 in SFmode, DFmode and XFmode */
1082 {4, 4, 4}, /* cost of storing fp registers
1083 in SFmode, DFmode and XFmode */
1084 2, /* cost of moving MMX register */
1085 {6, 6}, /* cost of loading MMX registers
1086 in SImode and DImode */
1087 {4, 4}, /* cost of storing MMX registers
1088 in SImode and DImode */
1089 2, /* cost of moving SSE register */
1090 {6, 6, 6}, /* cost of loading SSE registers
1091 in SImode, DImode and TImode */
1092 {4, 4, 4}, /* cost of storing SSE registers
1093 in SImode, DImode and TImode */
1094 2, /* MMX or SSE register to integer */
1095 32, /* size of l1 cache. */
1096 2048, /* size of l2 cache. */
1097 128, /* size of prefetch block */
1098 8, /* number of parallel prefetches */
1099 3, /* Branch cost */
1100 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1101 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1102 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1103 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1104 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1105 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1106 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1107 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1108 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1109 {{libcall, {{8, loop}, {15, unrolled_loop},
1110 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1111 {libcall, {{24, loop}, {32, unrolled_loop},
1112 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1113 1, /* scalar_stmt_cost. */
1114 1, /* scalar load_cost. */
1115 1, /* scalar_store_cost. */
1116 1, /* vec_stmt_cost. */
1117 1, /* vec_to_scalar_cost. */
1118 1, /* scalar_to_vec_cost. */
1119 1, /* vec_align_load_cost. */
1120 2, /* vec_unalign_load_cost. */
1121 1, /* vec_store_cost. */
1122 3, /* cond_taken_branch_cost. */
1123 1, /* cond_not_taken_branch_cost. */
1127 struct processor_costs atom_cost = {
1128 COSTS_N_INSNS (1), /* cost of an add instruction */
1129 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1130 COSTS_N_INSNS (1), /* variable shift costs */
1131 COSTS_N_INSNS (1), /* constant shift costs */
1132 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1133 COSTS_N_INSNS (4), /* HI */
1134 COSTS_N_INSNS (3), /* SI */
1135 COSTS_N_INSNS (4), /* DI */
1136 COSTS_N_INSNS (2)}, /* other */
1137 0, /* cost of multiply per each bit set */
1138 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1139 COSTS_N_INSNS (26), /* HI */
1140 COSTS_N_INSNS (42), /* SI */
1141 COSTS_N_INSNS (74), /* DI */
1142 COSTS_N_INSNS (74)}, /* other */
1143 COSTS_N_INSNS (1), /* cost of movsx */
1144 COSTS_N_INSNS (1), /* cost of movzx */
1145 8, /* "large" insn */
1146 17, /* MOVE_RATIO */
1147 2, /* cost for loading QImode using movzbl */
1148 {4, 4, 4}, /* cost of loading integer registers
1149 in QImode, HImode and SImode.
1150 Relative to reg-reg move (2). */
1151 {4, 4, 4}, /* cost of storing integer registers */
1152 4, /* cost of reg,reg fld/fst */
1153 {12, 12, 12}, /* cost of loading fp registers
1154 in SFmode, DFmode and XFmode */
1155 {6, 6, 8}, /* cost of storing fp registers
1156 in SFmode, DFmode and XFmode */
1157 2, /* cost of moving MMX register */
1158 {8, 8}, /* cost of loading MMX registers
1159 in SImode and DImode */
1160 {8, 8}, /* cost of storing MMX registers
1161 in SImode and DImode */
1162 2, /* cost of moving SSE register */
1163 {8, 8, 8}, /* cost of loading SSE registers
1164 in SImode, DImode and TImode */
1165 {8, 8, 8}, /* cost of storing SSE registers
1166 in SImode, DImode and TImode */
1167 5, /* MMX or SSE register to integer */
1168 32, /* size of l1 cache. */
1169 256, /* size of l2 cache. */
1170 64, /* size of prefetch block */
1171 6, /* number of parallel prefetches */
1172 3, /* Branch cost */
1173 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1174 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1175 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1176 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1177 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1178 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1179 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1180 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1181 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1182 {{libcall, {{8, loop}, {15, unrolled_loop},
1183 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1184 {libcall, {{24, loop}, {32, unrolled_loop},
1185 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1186 1, /* scalar_stmt_cost. */
1187 1, /* scalar load_cost. */
1188 1, /* scalar_store_cost. */
1189 1, /* vec_stmt_cost. */
1190 1, /* vec_to_scalar_cost. */
1191 1, /* scalar_to_vec_cost. */
1192 1, /* vec_align_load_cost. */
1193 2, /* vec_unalign_load_cost. */
1194 1, /* vec_store_cost. */
1195 3, /* cond_taken_branch_cost. */
1196 1, /* cond_not_taken_branch_cost. */
1199 /* Generic64 should produce code tuned for Nocona and K8. */
1201 struct processor_costs generic64_cost = {
1202 COSTS_N_INSNS (1), /* cost of an add instruction */
1203 /* On all chips taken into consideration lea is 2 cycles and more. With
1204 this cost however our current implementation of synth_mult results in
1205 use of unnecessary temporary registers causing regression on several
1206 SPECfp benchmarks. */
1207 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1208 COSTS_N_INSNS (1), /* variable shift costs */
1209 COSTS_N_INSNS (1), /* constant shift costs */
1210 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1211 COSTS_N_INSNS (4), /* HI */
1212 COSTS_N_INSNS (3), /* SI */
1213 COSTS_N_INSNS (4), /* DI */
1214 COSTS_N_INSNS (2)}, /* other */
1215 0, /* cost of multiply per each bit set */
1216 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1217 COSTS_N_INSNS (26), /* HI */
1218 COSTS_N_INSNS (42), /* SI */
1219 COSTS_N_INSNS (74), /* DI */
1220 COSTS_N_INSNS (74)}, /* other */
1221 COSTS_N_INSNS (1), /* cost of movsx */
1222 COSTS_N_INSNS (1), /* cost of movzx */
1223 8, /* "large" insn */
1224 17, /* MOVE_RATIO */
1225 4, /* cost for loading QImode using movzbl */
1226 {4, 4, 4}, /* cost of loading integer registers
1227 in QImode, HImode and SImode.
1228 Relative to reg-reg move (2). */
1229 {4, 4, 4}, /* cost of storing integer registers */
1230 4, /* cost of reg,reg fld/fst */
1231 {12, 12, 12}, /* cost of loading fp registers
1232 in SFmode, DFmode and XFmode */
1233 {6, 6, 8}, /* cost of storing fp registers
1234 in SFmode, DFmode and XFmode */
1235 2, /* cost of moving MMX register */
1236 {8, 8}, /* cost of loading MMX registers
1237 in SImode and DImode */
1238 {8, 8}, /* cost of storing MMX registers
1239 in SImode and DImode */
1240 2, /* cost of moving SSE register */
1241 {8, 8, 8}, /* cost of loading SSE registers
1242 in SImode, DImode and TImode */
1243 {8, 8, 8}, /* cost of storing SSE registers
1244 in SImode, DImode and TImode */
1245 5, /* MMX or SSE register to integer */
1246 32, /* size of l1 cache. */
1247 512, /* size of l2 cache. */
1248 64, /* size of prefetch block */
1249 6, /* number of parallel prefetches */
1250 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1251 value is increased to perhaps more appropriate value of 5. */
1252 3, /* Branch cost */
1253 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1254 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1255 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1256 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1257 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1258 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1259 {DUMMY_STRINGOP_ALGS,
1260 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1261 {DUMMY_STRINGOP_ALGS,
1262 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1263 1, /* scalar_stmt_cost. */
1264 1, /* scalar load_cost. */
1265 1, /* scalar_store_cost. */
1266 1, /* vec_stmt_cost. */
1267 1, /* vec_to_scalar_cost. */
1268 1, /* scalar_to_vec_cost. */
1269 1, /* vec_align_load_cost. */
1270 2, /* vec_unalign_load_cost. */
1271 1, /* vec_store_cost. */
1272 3, /* cond_taken_branch_cost. */
1273 1, /* cond_not_taken_branch_cost. */
1276 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1279 struct processor_costs generic32_cost = {
1280 COSTS_N_INSNS (1), /* cost of an add instruction */
1281 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1282 COSTS_N_INSNS (1), /* variable shift costs */
1283 COSTS_N_INSNS (1), /* constant shift costs */
1284 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1285 COSTS_N_INSNS (4), /* HI */
1286 COSTS_N_INSNS (3), /* SI */
1287 COSTS_N_INSNS (4), /* DI */
1288 COSTS_N_INSNS (2)}, /* other */
1289 0, /* cost of multiply per each bit set */
1290 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1291 COSTS_N_INSNS (26), /* HI */
1292 COSTS_N_INSNS (42), /* SI */
1293 COSTS_N_INSNS (74), /* DI */
1294 COSTS_N_INSNS (74)}, /* other */
1295 COSTS_N_INSNS (1), /* cost of movsx */
1296 COSTS_N_INSNS (1), /* cost of movzx */
1297 8, /* "large" insn */
1298 17, /* MOVE_RATIO */
1299 4, /* cost for loading QImode using movzbl */
1300 {4, 4, 4}, /* cost of loading integer registers
1301 in QImode, HImode and SImode.
1302 Relative to reg-reg move (2). */
1303 {4, 4, 4}, /* cost of storing integer registers */
1304 4, /* cost of reg,reg fld/fst */
1305 {12, 12, 12}, /* cost of loading fp registers
1306 in SFmode, DFmode and XFmode */
1307 {6, 6, 8}, /* cost of storing fp registers
1308 in SFmode, DFmode and XFmode */
1309 2, /* cost of moving MMX register */
1310 {8, 8}, /* cost of loading MMX registers
1311 in SImode and DImode */
1312 {8, 8}, /* cost of storing MMX registers
1313 in SImode and DImode */
1314 2, /* cost of moving SSE register */
1315 {8, 8, 8}, /* cost of loading SSE registers
1316 in SImode, DImode and TImode */
1317 {8, 8, 8}, /* cost of storing SSE registers
1318 in SImode, DImode and TImode */
1319 5, /* MMX or SSE register to integer */
1320 32, /* size of l1 cache. */
1321 256, /* size of l2 cache. */
1322 64, /* size of prefetch block */
1323 6, /* number of parallel prefetches */
1324 3, /* Branch cost */
1325 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1326 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1327 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1328 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1329 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1330 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1331 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1332 DUMMY_STRINGOP_ALGS},
1333 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1334 DUMMY_STRINGOP_ALGS},
1335 1, /* scalar_stmt_cost. */
1336 1, /* scalar load_cost. */
1337 1, /* scalar_store_cost. */
1338 1, /* vec_stmt_cost. */
1339 1, /* vec_to_scalar_cost. */
1340 1, /* scalar_to_vec_cost. */
1341 1, /* vec_align_load_cost. */
1342 2, /* vec_unalign_load_cost. */
1343 1, /* vec_store_cost. */
1344 3, /* cond_taken_branch_cost. */
1345 1, /* cond_not_taken_branch_cost. */
1348 const struct processor_costs *ix86_cost = &pentium_cost;
1350 /* Processor feature/optimization bitmasks. */
1351 #define m_386 (1<<PROCESSOR_I386)
1352 #define m_486 (1<<PROCESSOR_I486)
1353 #define m_PENT (1<<PROCESSOR_PENTIUM)
1354 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1355 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1356 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1357 #define m_CORE2 (1<<PROCESSOR_CORE2)
1358 #define m_ATOM (1<<PROCESSOR_ATOM)
1360 #define m_GEODE (1<<PROCESSOR_GEODE)
1361 #define m_K6 (1<<PROCESSOR_K6)
1362 #define m_K6_GEODE (m_K6 | m_GEODE)
1363 #define m_K8 (1<<PROCESSOR_K8)
1364 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1365 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1366 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1367 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1368 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10 | m_BDVER1)
1370 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1371 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1373 /* Generic instruction choice should be common subset of supported CPUs
1374 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1375 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1377 /* Feature tests against the various tunings. */
1378 unsigned char ix86_tune_features[X86_TUNE_LAST];
1380 /* Feature tests against the various tunings used to create ix86_tune_features
1381 based on the processor mask. */
1382 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1383 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1384 negatively, so enabling for Generic64 seems like good code size
1385 tradeoff. We can't enable it for 32bit generic because it does not
1386 work well with PPro base chips. */
1387 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1389 /* X86_TUNE_PUSH_MEMORY */
1390 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1391 | m_NOCONA | m_CORE2 | m_GENERIC,
1393 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1396 /* X86_TUNE_UNROLL_STRLEN */
1397 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1398 | m_CORE2 | m_GENERIC,
1400 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1401 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1403 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1404 on simulation result. But after P4 was made, no performance benefit
1405 was observed with branch hints. It also increases the code size.
1406 As a result, icc never generates branch hints. */
1409 /* X86_TUNE_DOUBLE_WITH_ADD */
1412 /* X86_TUNE_USE_SAHF */
1413 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER1 | m_PENT4
1414 | m_NOCONA | m_CORE2 | m_GENERIC,
1416 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1417 partial dependencies. */
1418 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1419 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1421 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1422 register stalls on Generic32 compilation setting as well. However
1423 in current implementation the partial register stalls are not eliminated
1424 very well - they can be introduced via subregs synthesized by combine
1425 and can happen in caller/callee saving sequences. Because this option
1426 pays back little on PPro based chips and is in conflict with partial reg
1427 dependencies used by Athlon/P4 based chips, it is better to leave it off
1428 for generic32 for now. */
1431 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1432 m_CORE2 | m_GENERIC,
1434 /* X86_TUNE_USE_HIMODE_FIOP */
1435 m_386 | m_486 | m_K6_GEODE,
1437 /* X86_TUNE_USE_SIMODE_FIOP */
1438 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
1440 /* X86_TUNE_USE_MOV0 */
1443 /* X86_TUNE_USE_CLTD */
1444 ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
1446 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1449 /* X86_TUNE_SPLIT_LONG_MOVES */
1452 /* X86_TUNE_READ_MODIFY_WRITE */
1455 /* X86_TUNE_READ_MODIFY */
1458 /* X86_TUNE_PROMOTE_QIMODE */
1459 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1460 | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
1462 /* X86_TUNE_FAST_PREFIX */
1463 ~(m_PENT | m_486 | m_386),
1465 /* X86_TUNE_SINGLE_STRINGOP */
1466 m_386 | m_PENT4 | m_NOCONA,
1468 /* X86_TUNE_QIMODE_MATH */
1471 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1472 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1473 might be considered for Generic32 if our scheme for avoiding partial
1474 stalls was more effective. */
1477 /* X86_TUNE_PROMOTE_QI_REGS */
1480 /* X86_TUNE_PROMOTE_HI_REGS */
1483 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1484 m_ATOM | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA
1485 | m_CORE2 | m_GENERIC,
1487 /* X86_TUNE_ADD_ESP_8 */
1488 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_K6_GEODE | m_386
1489 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1491 /* X86_TUNE_SUB_ESP_4 */
1492 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2
1495 /* X86_TUNE_SUB_ESP_8 */
1496 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_386 | m_486
1497 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1499 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1500 for DFmode copies */
1501 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1502 | m_GENERIC | m_GEODE),
1504 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1505 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1507 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1508 conflict here in between PPro/Pentium4 based chips that thread 128bit
1509 SSE registers as single units versus K8 based chips that divide SSE
1510 registers to two 64bit halves. This knob promotes all store destinations
1511 to be 128bit to allow register renaming on 128bit SSE units, but usually
1512 results in one extra microop on 64bit SSE units. Experimental results
1513 shows that disabling this option on P4 brings over 20% SPECfp regression,
1514 while enabling it on K8 brings roughly 2.4% regression that can be partly
1515 masked by careful scheduling of moves. */
1516 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
1517 | m_AMDFAM10 | m_BDVER1,
1519 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
1520 m_AMDFAM10 | m_BDVER1,
1522 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
1525 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
1528 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1529 are resolved on SSE register parts instead of whole registers, so we may
1530 maintain just lower part of scalar values in proper format leaving the
1531 upper part undefined. */
1534 /* X86_TUNE_SSE_TYPELESS_STORES */
1537 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1538 m_PPRO | m_PENT4 | m_NOCONA,
1540 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1541 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1543 /* X86_TUNE_PROLOGUE_USING_MOVE */
1544 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1546 /* X86_TUNE_EPILOGUE_USING_MOVE */
1547 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1549 /* X86_TUNE_SHIFT1 */
1552 /* X86_TUNE_USE_FFREEP */
1555 /* X86_TUNE_INTER_UNIT_MOVES */
1556 ~(m_AMD_MULTIPLE | m_GENERIC),
1558 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1559 ~(m_AMDFAM10 | m_BDVER1),
1561 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1562 than 4 branch instructions in the 16 byte window. */
1563 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
1566 /* X86_TUNE_SCHEDULE */
1567 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
1570 /* X86_TUNE_USE_BT */
1571 m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
1573 /* X86_TUNE_USE_INCDEC */
1574 ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
1576 /* X86_TUNE_PAD_RETURNS */
1577 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1579 /* X86_TUNE_EXT_80387_CONSTANTS */
1580 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
1581 | m_CORE2 | m_GENERIC,
1583 /* X86_TUNE_SHORTEN_X87_SSE */
1586 /* X86_TUNE_AVOID_VECTOR_DECODE */
1589 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1590 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1593 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1594 vector path on AMD machines. */
1595 m_K8 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1,
1597 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1599 m_K8 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1,
1601 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1605 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1606 but one byte longer. */
1609 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1610 operand that cannot be represented using a modRM byte. The XOR
1611 replacement is long decoded, so this split helps here as well. */
1614 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1616 m_AMDFAM10 | m_GENERIC,
1618 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1619 from integer to FP. */
1622 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1623 with a subsequent conditional jump instruction into a single
1624 compare-and-branch uop. */
1627 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1628 will impact LEA instruction selection. */
1632 /* Feature tests against the various architecture variations. */
1633 unsigned char ix86_arch_features[X86_ARCH_LAST];
1635 /* Feature tests against the various architecture variations, used to create
1636 ix86_arch_features based on the processor mask. */
1637 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1638 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1639 ~(m_386 | m_486 | m_PENT | m_K6),
1641 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1644 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1647 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1650 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1654 static const unsigned int x86_accumulate_outgoing_args
1655 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1658 static const unsigned int x86_arch_always_fancy_math_387
1659 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1660 | m_NOCONA | m_CORE2 | m_GENERIC;
1662 static enum stringop_alg stringop_alg = no_stringop;
1664 /* In case the average insn count for single function invocation is
1665 lower than this constant, emit fast (but longer) prologue and
1667 #define FAST_PROLOGUE_INSN_COUNT 20
1669 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1670 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1671 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1672 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1674 /* Array of the smallest class containing reg number REGNO, indexed by
1675 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1677 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1679 /* ax, dx, cx, bx */
1680 AREG, DREG, CREG, BREG,
1681 /* si, di, bp, sp */
1682 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1684 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1685 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1688 /* flags, fpsr, fpcr, frame */
1689 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1691 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1694 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1697 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1698 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1699 /* SSE REX registers */
1700 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1704 /* The "default" register map used in 32bit mode. */
1706 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1708 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1709 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1710 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1711 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1712 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1713 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1714 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1717 /* The "default" register map used in 64bit mode. */
1719 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1721 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1722 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1723 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1724 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1725 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1726 8,9,10,11,12,13,14,15, /* extended integer registers */
1727 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1730 /* Define the register numbers to be used in Dwarf debugging information.
1731 The SVR4 reference port C compiler uses the following register numbers
1732 in its Dwarf output code:
1733 0 for %eax (gcc regno = 0)
1734 1 for %ecx (gcc regno = 2)
1735 2 for %edx (gcc regno = 1)
1736 3 for %ebx (gcc regno = 3)
1737 4 for %esp (gcc regno = 7)
1738 5 for %ebp (gcc regno = 6)
1739 6 for %esi (gcc regno = 4)
1740 7 for %edi (gcc regno = 5)
1741 The following three DWARF register numbers are never generated by
1742 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1743 believes these numbers have these meanings.
1744 8 for %eip (no gcc equivalent)
1745 9 for %eflags (gcc regno = 17)
1746 10 for %trapno (no gcc equivalent)
1747 It is not at all clear how we should number the FP stack registers
1748 for the x86 architecture. If the version of SDB on x86/svr4 were
1749 a bit less brain dead with respect to floating-point then we would
1750 have a precedent to follow with respect to DWARF register numbers
1751 for x86 FP registers, but the SDB on x86/svr4 is so completely
1752 broken with respect to FP registers that it is hardly worth thinking
1753 of it as something to strive for compatibility with.
1754 The version of x86/svr4 SDB I have at the moment does (partially)
1755 seem to believe that DWARF register number 11 is associated with
1756 the x86 register %st(0), but that's about all. Higher DWARF
1757 register numbers don't seem to be associated with anything in
1758 particular, and even for DWARF regno 11, SDB only seems to under-
1759 stand that it should say that a variable lives in %st(0) (when
1760 asked via an `=' command) if we said it was in DWARF regno 11,
1761 but SDB still prints garbage when asked for the value of the
1762 variable in question (via a `/' command).
1763 (Also note that the labels SDB prints for various FP stack regs
1764 when doing an `x' command are all wrong.)
1765 Note that these problems generally don't affect the native SVR4
1766 C compiler because it doesn't allow the use of -O with -g and
1767 because when it is *not* optimizing, it allocates a memory
1768 location for each floating-point variable, and the memory
1769 location is what gets described in the DWARF AT_location
1770 attribute for the variable in question.
1771 Regardless of the severe mental illness of the x86/svr4 SDB, we
1772 do something sensible here and we use the following DWARF
1773 register numbers. Note that these are all stack-top-relative
1775 11 for %st(0) (gcc regno = 8)
1776 12 for %st(1) (gcc regno = 9)
1777 13 for %st(2) (gcc regno = 10)
1778 14 for %st(3) (gcc regno = 11)
1779 15 for %st(4) (gcc regno = 12)
1780 16 for %st(5) (gcc regno = 13)
1781 17 for %st(6) (gcc regno = 14)
1782 18 for %st(7) (gcc regno = 15)
1784 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1786 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1787 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1788 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1789 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1790 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1791 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1792 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1795 /* Test and compare insns in i386.md store the information needed to
1796 generate branch and scc insns here. */
1798 rtx ix86_compare_op0 = NULL_RTX;
1799 rtx ix86_compare_op1 = NULL_RTX;
1801 /* Define parameter passing and return registers. */
1803 static int const x86_64_int_parameter_registers[6] =
1805 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
1808 static int const x86_64_ms_abi_int_parameter_registers[4] =
1810 CX_REG, DX_REG, R8_REG, R9_REG
1813 static int const x86_64_int_return_registers[4] =
1815 AX_REG, DX_REG, DI_REG, SI_REG
1818 /* Define the structure for the machine field in struct function. */
1820 struct GTY(()) stack_local_entry {
1821 unsigned short mode;
1824 struct stack_local_entry *next;
1827 /* Structure describing stack frame layout.
1828 Stack grows downward:
1834 saved frame pointer if frame_pointer_needed
1835 <- HARD_FRAME_POINTER
1844 [va_arg registers] (
1845 > to_allocate <- FRAME_POINTER
1858 HOST_WIDE_INT frame;
1860 int outgoing_arguments_size;
1862 HOST_WIDE_INT to_allocate;
1863 /* The offsets relative to ARG_POINTER. */
1864 HOST_WIDE_INT frame_pointer_offset;
1865 HOST_WIDE_INT hard_frame_pointer_offset;
1866 HOST_WIDE_INT stack_pointer_offset;
1868 /* When save_regs_using_mov is set, emit prologue using
1869 move instead of push instructions. */
1870 bool save_regs_using_mov;
1873 /* Code model option. */
1874 enum cmodel ix86_cmodel;
1876 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1878 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1880 /* Which unit we are generating floating point math for. */
1881 enum fpmath_unit ix86_fpmath;
1883 /* Which cpu are we scheduling for. */
1884 enum attr_cpu ix86_schedule;
1886 /* Which cpu are we optimizing for. */
1887 enum processor_type ix86_tune;
1889 /* Which instruction set architecture to use. */
1890 enum processor_type ix86_arch;
1892 /* true if sse prefetch instruction is not NOOP. */
1893 int x86_prefetch_sse;
1895 /* ix86_regparm_string as a number */
1896 static int ix86_regparm;
1898 /* -mstackrealign option */
1899 extern int ix86_force_align_arg_pointer;
1900 static const char ix86_force_align_arg_pointer_string[]
1901 = "force_align_arg_pointer";
1903 static rtx (*ix86_gen_leave) (void);
1904 static rtx (*ix86_gen_pop1) (rtx);
1905 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1906 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1907 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
1908 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1909 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1910 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1911 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
1912 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
1913 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
1915 /* Preferred alignment for stack boundary in bits. */
1916 unsigned int ix86_preferred_stack_boundary;
1918 /* Alignment for incoming stack boundary in bits specified at
1920 static unsigned int ix86_user_incoming_stack_boundary;
1922 /* Default alignment for incoming stack boundary in bits. */
1923 static unsigned int ix86_default_incoming_stack_boundary;
1925 /* Alignment for incoming stack boundary in bits. */
1926 unsigned int ix86_incoming_stack_boundary;
1928 /* The abi used by target. */
1929 enum calling_abi ix86_abi;
1931 /* Values 1-5: see jump.c */
1932 int ix86_branch_cost;
1934 /* Calling abi specific va_list type nodes. */
1935 static GTY(()) tree sysv_va_list_type_node;
1936 static GTY(()) tree ms_va_list_type_node;
1938 /* Variables which are this size or smaller are put in the data/bss
1939 or ldata/lbss sections. */
1941 int ix86_section_threshold = 65536;
1943 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1944 char internal_label_prefix[16];
1945 int internal_label_prefix_len;
1947 /* Fence to use after loop using movnt. */
1950 /* Register class used for passing given 64bit part of the argument.
1951 These represent classes as documented by the PS ABI, with the exception
1952 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1953 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1955 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1956 whenever possible (upper half does contain padding). */
1957 enum x86_64_reg_class
1960 X86_64_INTEGER_CLASS,
1961 X86_64_INTEGERSI_CLASS,
1968 X86_64_COMPLEX_X87_CLASS,
1972 #define MAX_CLASSES 4
1974 /* Table of constants used by fldpi, fldln2, etc.... */
1975 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1976 static bool ext_80387_constants_init = 0;
1979 static struct machine_function * ix86_init_machine_status (void);
1980 static rtx ix86_function_value (const_tree, const_tree, bool);
1981 static bool ix86_function_value_regno_p (const unsigned int);
1982 static rtx ix86_static_chain (const_tree, bool);
1983 static int ix86_function_regparm (const_tree, const_tree);
1984 static void ix86_compute_frame_layout (struct ix86_frame *);
1985 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1987 static void ix86_add_new_builtins (int);
1988 static rtx ix86_expand_vec_perm_builtin (tree);
1989 static tree ix86_canonical_va_list_type (tree);
1991 enum ix86_function_specific_strings
1993 IX86_FUNCTION_SPECIFIC_ARCH,
1994 IX86_FUNCTION_SPECIFIC_TUNE,
1995 IX86_FUNCTION_SPECIFIC_FPMATH,
1996 IX86_FUNCTION_SPECIFIC_MAX
1999 static char *ix86_target_string (int, int, const char *, const char *,
2000 const char *, bool);
2001 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2002 static void ix86_function_specific_save (struct cl_target_option *);
2003 static void ix86_function_specific_restore (struct cl_target_option *);
2004 static void ix86_function_specific_print (FILE *, int,
2005 struct cl_target_option *);
2006 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2007 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
2008 static bool ix86_can_inline_p (tree, tree);
2009 static void ix86_set_current_function (tree);
2010 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2012 static enum calling_abi ix86_function_abi (const_tree);
2015 #ifndef SUBTARGET32_DEFAULT_CPU
2016 #define SUBTARGET32_DEFAULT_CPU "i386"
2019 /* The svr4 ABI for the i386 says that records and unions are returned
2021 #ifndef DEFAULT_PCC_STRUCT_RETURN
2022 #define DEFAULT_PCC_STRUCT_RETURN 1
2025 /* Whether -mtune= or -march= were specified */
2026 static int ix86_tune_defaulted;
2027 static int ix86_arch_specified;
2029 /* Bit flags that specify the ISA we are compiling for. */
2030 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
2032 /* A mask of ix86_isa_flags that includes bit X if X
2033 was set or cleared on the command line. */
2034 static int ix86_isa_flags_explicit;
2036 /* Define a set of ISAs which are available when a given ISA is
2037 enabled. MMX and SSE ISAs are handled separately. */
2039 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
2040 #define OPTION_MASK_ISA_3DNOW_SET \
2041 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
2043 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
2044 #define OPTION_MASK_ISA_SSE2_SET \
2045 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
2046 #define OPTION_MASK_ISA_SSE3_SET \
2047 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
2048 #define OPTION_MASK_ISA_SSSE3_SET \
2049 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
2050 #define OPTION_MASK_ISA_SSE4_1_SET \
2051 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
2052 #define OPTION_MASK_ISA_SSE4_2_SET \
2053 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
2054 #define OPTION_MASK_ISA_AVX_SET \
2055 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
2056 #define OPTION_MASK_ISA_FMA_SET \
2057 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
2059 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
2061 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
2063 #define OPTION_MASK_ISA_SSE4A_SET \
2064 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
2065 #define OPTION_MASK_ISA_FMA4_SET \
2066 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \
2067 | OPTION_MASK_ISA_AVX_SET)
2068 #define OPTION_MASK_ISA_XOP_SET \
2069 (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET)
2070 #define OPTION_MASK_ISA_LWP_SET \
2073 /* AES and PCLMUL need SSE2 because they use xmm registers */
2074 #define OPTION_MASK_ISA_AES_SET \
2075 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
2076 #define OPTION_MASK_ISA_PCLMUL_SET \
2077 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
2079 #define OPTION_MASK_ISA_ABM_SET \
2080 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
2082 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
2083 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
2084 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
2085 #define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
2086 #define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
2088 #define OPTION_MASK_ISA_FSGSBASE_SET OPTION_MASK_ISA_FSGSBASE
2089 #define OPTION_MASK_ISA_RDRND_SET OPTION_MASK_ISA_RDRND
2090 #define OPTION_MASK_ISA_F16C_SET \
2091 (OPTION_MASK_ISA_F16C | OPTION_MASK_ISA_AVX_SET)
2093 /* Define a set of ISAs which aren't available when a given ISA is
2094 disabled. MMX and SSE ISAs are handled separately. */
2096 #define OPTION_MASK_ISA_MMX_UNSET \
2097 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
2098 #define OPTION_MASK_ISA_3DNOW_UNSET \
2099 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
2100 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
2102 #define OPTION_MASK_ISA_SSE_UNSET \
2103 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
2104 #define OPTION_MASK_ISA_SSE2_UNSET \
2105 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
2106 #define OPTION_MASK_ISA_SSE3_UNSET \
2107 (OPTION_MASK_ISA_SSE3 \
2108 | OPTION_MASK_ISA_SSSE3_UNSET \
2109 | OPTION_MASK_ISA_SSE4A_UNSET )
2110 #define OPTION_MASK_ISA_SSSE3_UNSET \
2111 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
2112 #define OPTION_MASK_ISA_SSE4_1_UNSET \
2113 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
2114 #define OPTION_MASK_ISA_SSE4_2_UNSET \
2115 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
2116 #define OPTION_MASK_ISA_AVX_UNSET \
2117 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
2118 | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET)
2119 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2121 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2123 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2125 #define OPTION_MASK_ISA_SSE4A_UNSET \
2126 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET)
2128 #define OPTION_MASK_ISA_FMA4_UNSET \
2129 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET)
2130 #define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP
2131 #define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP
2133 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2134 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2135 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2136 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2137 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2138 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2139 #define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
2140 #define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
2142 #define OPTION_MASK_ISA_FSGSBASE_UNSET OPTION_MASK_ISA_FSGSBASE
2143 #define OPTION_MASK_ISA_RDRND_UNSET OPTION_MASK_ISA_RDRND
2144 #define OPTION_MASK_ISA_F16C_UNSET OPTION_MASK_ISA_F16C
2146 /* Vectorization library interface and handlers. */
2147 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2149 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2150 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2152 /* Processor target table, indexed by processor number */
2155 const struct processor_costs *cost; /* Processor costs */
2156 const int align_loop; /* Default alignments. */
2157 const int align_loop_max_skip;
2158 const int align_jump;
2159 const int align_jump_max_skip;
2160 const int align_func;
2163 static const struct ptt processor_target_table[PROCESSOR_max] =
2165 {&i386_cost, 4, 3, 4, 3, 4},
2166 {&i486_cost, 16, 15, 16, 15, 16},
2167 {&pentium_cost, 16, 7, 16, 7, 16},
2168 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2169 {&geode_cost, 0, 0, 0, 0, 0},
2170 {&k6_cost, 32, 7, 32, 7, 32},
2171 {&athlon_cost, 16, 7, 16, 7, 16},
2172 {&pentium4_cost, 0, 0, 0, 0, 0},
2173 {&k8_cost, 16, 7, 16, 7, 16},
2174 {&nocona_cost, 0, 0, 0, 0, 0},
2175 {&core2_cost, 16, 10, 16, 10, 16},
2176 {&generic32_cost, 16, 7, 16, 7, 16},
2177 {&generic64_cost, 16, 10, 16, 10, 16},
2178 {&amdfam10_cost, 32, 24, 32, 7, 32},
2179 {&bdver1_cost, 32, 24, 32, 7, 32},
2180 {&atom_cost, 16, 7, 16, 7, 16}
2183 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2210 /* Implement TARGET_HANDLE_OPTION. */
2213 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
2220 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2221 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2225 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2226 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2233 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2234 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2238 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2239 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2249 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2250 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2254 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2255 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2262 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2263 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2267 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2268 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2275 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2276 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2280 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2281 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2288 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2289 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2293 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2294 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2301 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2302 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2306 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2307 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2314 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2315 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2319 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2320 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2327 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2328 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2332 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2333 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2340 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2341 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2345 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2346 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2351 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2352 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2356 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2357 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2363 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2364 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2368 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2369 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2376 ix86_isa_flags |= OPTION_MASK_ISA_FMA4_SET;
2377 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_SET;
2381 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA4_UNSET;
2382 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_UNSET;
2389 ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET;
2390 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET;
2394 ix86_isa_flags &= ~OPTION_MASK_ISA_XOP_UNSET;
2395 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_UNSET;
2402 ix86_isa_flags |= OPTION_MASK_ISA_LWP_SET;
2403 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_SET;
2407 ix86_isa_flags &= ~OPTION_MASK_ISA_LWP_UNSET;
2408 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_UNSET;
2415 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2416 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2420 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2421 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2428 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2429 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2433 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2434 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2441 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2442 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2446 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2447 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2454 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2455 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2459 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2460 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2467 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
2468 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
2472 ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
2473 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
2480 ix86_isa_flags |= OPTION_MASK_ISA_CRC32_SET;
2481 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_SET;
2485 ix86_isa_flags &= ~OPTION_MASK_ISA_CRC32_UNSET;
2486 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_UNSET;
2493 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2494 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2498 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2499 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2506 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2507 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2511 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2512 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2519 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE_SET;
2520 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_SET;
2524 ix86_isa_flags &= ~OPTION_MASK_ISA_FSGSBASE_UNSET;
2525 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_UNSET;
2532 ix86_isa_flags |= OPTION_MASK_ISA_RDRND_SET;
2533 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_SET;
2537 ix86_isa_flags &= ~OPTION_MASK_ISA_RDRND_UNSET;
2538 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_UNSET;
2545 ix86_isa_flags |= OPTION_MASK_ISA_F16C_SET;
2546 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_SET;
2550 ix86_isa_flags &= ~OPTION_MASK_ISA_F16C_UNSET;
2551 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_UNSET;
2560 /* Return a string that documents the current -m options. The caller is
2561 responsible for freeing the string. */
2564 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2565 const char *fpmath, bool add_nl_p)
2567 struct ix86_target_opts
2569 const char *option; /* option string */
2570 int mask; /* isa mask options */
2573 /* This table is ordered so that options like -msse4.2 that imply
2574 preceding options while match those first. */
2575 static struct ix86_target_opts isa_opts[] =
2577 { "-m64", OPTION_MASK_ISA_64BIT },
2578 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2579 { "-mfma", OPTION_MASK_ISA_FMA },
2580 { "-mxop", OPTION_MASK_ISA_XOP },
2581 { "-mlwp", OPTION_MASK_ISA_LWP },
2582 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2583 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2584 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2585 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2586 { "-msse3", OPTION_MASK_ISA_SSE3 },
2587 { "-msse2", OPTION_MASK_ISA_SSE2 },
2588 { "-msse", OPTION_MASK_ISA_SSE },
2589 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2590 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2591 { "-mmmx", OPTION_MASK_ISA_MMX },
2592 { "-mabm", OPTION_MASK_ISA_ABM },
2593 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2594 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2595 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2596 { "-maes", OPTION_MASK_ISA_AES },
2597 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2598 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2599 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2600 { "-mf16c", OPTION_MASK_ISA_F16C },
2604 static struct ix86_target_opts flag_opts[] =
2606 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2607 { "-m80387", MASK_80387 },
2608 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2609 { "-malign-double", MASK_ALIGN_DOUBLE },
2610 { "-mcld", MASK_CLD },
2611 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2612 { "-mieee-fp", MASK_IEEE_FP },
2613 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2614 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2615 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2616 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2617 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2618 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2619 { "-mno-red-zone", MASK_NO_RED_ZONE },
2620 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2621 { "-mrecip", MASK_RECIP },
2622 { "-mrtd", MASK_RTD },
2623 { "-msseregparm", MASK_SSEREGPARM },
2624 { "-mstack-arg-probe", MASK_STACK_PROBE },
2625 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2628 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2631 char target_other[40];
2640 memset (opts, '\0', sizeof (opts));
2642 /* Add -march= option. */
2645 opts[num][0] = "-march=";
2646 opts[num++][1] = arch;
2649 /* Add -mtune= option. */
2652 opts[num][0] = "-mtune=";
2653 opts[num++][1] = tune;
2656 /* Pick out the options in isa options. */
2657 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2659 if ((isa & isa_opts[i].mask) != 0)
2661 opts[num++][0] = isa_opts[i].option;
2662 isa &= ~ isa_opts[i].mask;
2666 if (isa && add_nl_p)
2668 opts[num++][0] = isa_other;
2669 sprintf (isa_other, "(other isa: %#x)", isa);
2672 /* Add flag options. */
2673 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2675 if ((flags & flag_opts[i].mask) != 0)
2677 opts[num++][0] = flag_opts[i].option;
2678 flags &= ~ flag_opts[i].mask;
2682 if (flags && add_nl_p)
2684 opts[num++][0] = target_other;
2685 sprintf (target_other, "(other flags: %#x)", flags);
2688 /* Add -fpmath= option. */
2691 opts[num][0] = "-mfpmath=";
2692 opts[num++][1] = fpmath;
2699 gcc_assert (num < ARRAY_SIZE (opts));
2701 /* Size the string. */
2703 sep_len = (add_nl_p) ? 3 : 1;
2704 for (i = 0; i < num; i++)
2707 for (j = 0; j < 2; j++)
2709 len += strlen (opts[i][j]);
2712 /* Build the string. */
2713 ret = ptr = (char *) xmalloc (len);
2716 for (i = 0; i < num; i++)
2720 for (j = 0; j < 2; j++)
2721 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2728 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2736 for (j = 0; j < 2; j++)
2739 memcpy (ptr, opts[i][j], len2[j]);
2741 line_len += len2[j];
2746 gcc_assert (ret + len >= ptr);
2751 /* Return TRUE if software prefetching is beneficial for the
2755 software_prefetching_beneficial_p (void)
2759 case PROCESSOR_GEODE:
2761 case PROCESSOR_ATHLON:
2763 case PROCESSOR_AMDFAM10:
2771 /* Function that is callable from the debugger to print the current
2774 ix86_debug_options (void)
2776 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2777 ix86_arch_string, ix86_tune_string,
2778 ix86_fpmath_string, true);
2782 fprintf (stderr, "%s\n\n", opts);
2786 fputs ("<no options>\n\n", stderr);
2791 /* Sometimes certain combinations of command options do not make
2792 sense on a particular target machine. You can define a macro
2793 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2794 defined, is executed once just after all the command options have
2797 Don't use this macro to turn on various extra optimizations for
2798 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2801 override_options (bool main_args_p)
2804 unsigned int ix86_arch_mask, ix86_tune_mask;
2805 const bool ix86_tune_specified = (ix86_tune_string != NULL);
2810 /* Comes from final.c -- no real reason to change it. */
2811 #define MAX_CODE_ALIGN 16
2819 PTA_PREFETCH_SSE = 1 << 4,
2821 PTA_3DNOW_A = 1 << 6,
2825 PTA_POPCNT = 1 << 10,
2827 PTA_SSE4A = 1 << 12,
2828 PTA_NO_SAHF = 1 << 13,
2829 PTA_SSE4_1 = 1 << 14,
2830 PTA_SSE4_2 = 1 << 15,
2832 PTA_PCLMUL = 1 << 17,
2835 PTA_MOVBE = 1 << 20,
2839 PTA_FSGSBASE = 1 << 24,
2840 PTA_RDRND = 1 << 25,
2846 const char *const name; /* processor name or nickname. */
2847 const enum processor_type processor;
2848 const enum attr_cpu schedule;
2849 const unsigned /*enum pta_flags*/ flags;
2851 const processor_alias_table[] =
2853 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2854 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2855 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2856 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2857 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2858 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2859 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2860 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2861 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2862 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2863 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2864 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2865 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2867 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2869 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2870 PTA_MMX | PTA_SSE | PTA_SSE2},
2871 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2872 PTA_MMX |PTA_SSE | PTA_SSE2},
2873 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2874 PTA_MMX | PTA_SSE | PTA_SSE2},
2875 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2876 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2877 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2878 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2879 | PTA_CX16 | PTA_NO_SAHF},
2880 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2881 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2882 | PTA_SSSE3 | PTA_CX16},
2883 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2884 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2885 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
2886 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2887 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2888 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2889 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2890 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2891 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2892 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2893 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2894 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2895 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2896 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2897 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2898 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2899 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2900 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2901 {"x86-64", PROCESSOR_K8, CPU_K8,
2902 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2903 {"k8", PROCESSOR_K8, CPU_K8,
2904 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2905 | PTA_SSE2 | PTA_NO_SAHF},
2906 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2907 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2908 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2909 {"opteron", PROCESSOR_K8, CPU_K8,
2910 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2911 | PTA_SSE2 | PTA_NO_SAHF},
2912 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2913 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2914 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2915 {"athlon64", PROCESSOR_K8, CPU_K8,
2916 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2917 | PTA_SSE2 | PTA_NO_SAHF},
2918 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2919 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2920 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2921 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2922 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2923 | PTA_SSE2 | PTA_NO_SAHF},
2924 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2925 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2926 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2927 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2928 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2929 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2930 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
2931 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2932 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM
2933 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES
2934 | PTA_PCLMUL | PTA_AVX | PTA_FMA4 | PTA_XOP | PTA_LWP},
2935 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2936 0 /* flags are only used for -march switch. */ },
2937 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2938 PTA_64BIT /* flags are only used for -march switch. */ },
2941 int const pta_size = ARRAY_SIZE (processor_alias_table);
2943 /* Set up prefix/suffix so the error messages refer to either the command
2944 line argument, or the attribute(target). */
2953 prefix = "option(\"";
2958 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2959 SUBTARGET_OVERRIDE_OPTIONS;
2962 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2963 SUBSUBTARGET_OVERRIDE_OPTIONS;
2966 /* -fPIC is the default for x86_64. */
2967 if (TARGET_MACHO && TARGET_64BIT)
2970 /* Set the default values for switches whose default depends on TARGET_64BIT
2971 in case they weren't overwritten by command line options. */
2976 /* Mach-O doesn't support omitting the frame pointer for now. */
2977 if (flag_omit_frame_pointer == 2)
2978 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2979 if (flag_asynchronous_unwind_tables == 2)
2980 flag_asynchronous_unwind_tables = 1;
2981 if (flag_pcc_struct_return == 2)
2982 flag_pcc_struct_return = 0;
2988 if (flag_omit_frame_pointer == 2)
2989 flag_omit_frame_pointer = 0;
2990 if (flag_asynchronous_unwind_tables == 2)
2991 flag_asynchronous_unwind_tables = 0;
2992 if (flag_pcc_struct_return == 2)
2993 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2996 /* Need to check -mtune=generic first. */
2997 if (ix86_tune_string)
2999 if (!strcmp (ix86_tune_string, "generic")
3000 || !strcmp (ix86_tune_string, "i686")
3001 /* As special support for cross compilers we read -mtune=native
3002 as -mtune=generic. With native compilers we won't see the
3003 -mtune=native, as it was changed by the driver. */
3004 || !strcmp (ix86_tune_string, "native"))
3007 ix86_tune_string = "generic64";
3009 ix86_tune_string = "generic32";
3011 /* If this call is for setting the option attribute, allow the
3012 generic32/generic64 that was previously set. */
3013 else if (!main_args_p
3014 && (!strcmp (ix86_tune_string, "generic32")
3015 || !strcmp (ix86_tune_string, "generic64")))
3017 else if (!strncmp (ix86_tune_string, "generic", 7))
3018 error ("bad value (%s) for %stune=%s %s",
3019 ix86_tune_string, prefix, suffix, sw);
3020 else if (!strcmp (ix86_tune_string, "x86-64"))
3021 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
3022 "%stune=k8%s or %stune=generic%s instead as appropriate.",
3023 prefix, suffix, prefix, suffix, prefix, suffix);
3027 if (ix86_arch_string)
3028 ix86_tune_string = ix86_arch_string;
3029 if (!ix86_tune_string)
3031 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3032 ix86_tune_defaulted = 1;
3035 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3036 need to use a sensible tune option. */
3037 if (!strcmp (ix86_tune_string, "generic")
3038 || !strcmp (ix86_tune_string, "x86-64")
3039 || !strcmp (ix86_tune_string, "i686"))
3042 ix86_tune_string = "generic64";
3044 ix86_tune_string = "generic32";
3048 if (ix86_stringop_string)
3050 if (!strcmp (ix86_stringop_string, "rep_byte"))
3051 stringop_alg = rep_prefix_1_byte;
3052 else if (!strcmp (ix86_stringop_string, "libcall"))
3053 stringop_alg = libcall;
3054 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
3055 stringop_alg = rep_prefix_4_byte;
3056 else if (!strcmp (ix86_stringop_string, "rep_8byte")
3058 /* rep; movq isn't available in 32-bit code. */
3059 stringop_alg = rep_prefix_8_byte;
3060 else if (!strcmp (ix86_stringop_string, "byte_loop"))
3061 stringop_alg = loop_1_byte;
3062 else if (!strcmp (ix86_stringop_string, "loop"))
3063 stringop_alg = loop;
3064 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
3065 stringop_alg = unrolled_loop;
3067 error ("bad value (%s) for %sstringop-strategy=%s %s",
3068 ix86_stringop_string, prefix, suffix, sw);
3071 if (!ix86_arch_string)
3072 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3074 ix86_arch_specified = 1;
3076 /* Validate -mabi= value. */
3077 if (ix86_abi_string)
3079 if (strcmp (ix86_abi_string, "sysv") == 0)
3080 ix86_abi = SYSV_ABI;
3081 else if (strcmp (ix86_abi_string, "ms") == 0)
3084 error ("unknown ABI (%s) for %sabi=%s %s",
3085 ix86_abi_string, prefix, suffix, sw);
3088 ix86_abi = DEFAULT_ABI;
3090 if (ix86_cmodel_string != 0)
3092 if (!strcmp (ix86_cmodel_string, "small"))
3093 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3094 else if (!strcmp (ix86_cmodel_string, "medium"))
3095 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
3096 else if (!strcmp (ix86_cmodel_string, "large"))
3097 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
3099 error ("code model %s does not support PIC mode", ix86_cmodel_string);
3100 else if (!strcmp (ix86_cmodel_string, "32"))
3101 ix86_cmodel = CM_32;
3102 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
3103 ix86_cmodel = CM_KERNEL;
3105 error ("bad value (%s) for %scmodel=%s %s",
3106 ix86_cmodel_string, prefix, suffix, sw);
3110 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3111 use of rip-relative addressing. This eliminates fixups that
3112 would otherwise be needed if this object is to be placed in a
3113 DLL, and is essentially just as efficient as direct addressing. */
3114 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3115 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3116 else if (TARGET_64BIT)
3117 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3119 ix86_cmodel = CM_32;
3121 if (ix86_asm_string != 0)
3124 && !strcmp (ix86_asm_string, "intel"))
3125 ix86_asm_dialect = ASM_INTEL;
3126 else if (!strcmp (ix86_asm_string, "att"))
3127 ix86_asm_dialect = ASM_ATT;
3129 error ("bad value (%s) for %sasm=%s %s",
3130 ix86_asm_string, prefix, suffix, sw);
3132 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
3133 error ("code model %qs not supported in the %s bit mode",
3134 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
3135 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3136 sorry ("%i-bit mode not compiled in",
3137 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3139 for (i = 0; i < pta_size; i++)
3140 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3142 ix86_schedule = processor_alias_table[i].schedule;
3143 ix86_arch = processor_alias_table[i].processor;
3144 /* Default cpu tuning to the architecture. */
3145 ix86_tune = ix86_arch;
3147 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3148 error ("CPU you selected does not support x86-64 "
3151 if (processor_alias_table[i].flags & PTA_MMX
3152 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3153 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3154 if (processor_alias_table[i].flags & PTA_3DNOW
3155 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3156 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3157 if (processor_alias_table[i].flags & PTA_3DNOW_A
3158 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3159 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3160 if (processor_alias_table[i].flags & PTA_SSE
3161 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3162 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3163 if (processor_alias_table[i].flags & PTA_SSE2
3164 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3165 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3166 if (processor_alias_table[i].flags & PTA_SSE3
3167 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3168 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3169 if (processor_alias_table[i].flags & PTA_SSSE3
3170 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3171 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3172 if (processor_alias_table[i].flags & PTA_SSE4_1
3173 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3174 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3175 if (processor_alias_table[i].flags & PTA_SSE4_2
3176 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3177 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3178 if (processor_alias_table[i].flags & PTA_AVX
3179 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3180 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3181 if (processor_alias_table[i].flags & PTA_FMA
3182 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3183 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3184 if (processor_alias_table[i].flags & PTA_SSE4A
3185 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3186 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3187 if (processor_alias_table[i].flags & PTA_FMA4
3188 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3189 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3190 if (processor_alias_table[i].flags & PTA_XOP
3191 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3192 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3193 if (processor_alias_table[i].flags & PTA_LWP
3194 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3195 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3196 if (processor_alias_table[i].flags & PTA_ABM
3197 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3198 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3199 if (processor_alias_table[i].flags & PTA_CX16
3200 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3201 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3202 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3203 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3204 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3205 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3206 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3207 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3208 if (processor_alias_table[i].flags & PTA_MOVBE
3209 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3210 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3211 if (processor_alias_table[i].flags & PTA_AES
3212 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3213 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3214 if (processor_alias_table[i].flags & PTA_PCLMUL
3215 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3216 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3217 if (processor_alias_table[i].flags & PTA_FSGSBASE
3218 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3219 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3220 if (processor_alias_table[i].flags & PTA_RDRND
3221 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3222 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3223 if (processor_alias_table[i].flags & PTA_F16C
3224 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3225 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3226 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3227 x86_prefetch_sse = true;
3232 if (!strcmp (ix86_arch_string, "generic"))
3233 error ("generic CPU can be used only for %stune=%s %s",
3234 prefix, suffix, sw);
3235 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3236 error ("bad value (%s) for %sarch=%s %s",
3237 ix86_arch_string, prefix, suffix, sw);
3239 ix86_arch_mask = 1u << ix86_arch;
3240 for (i = 0; i < X86_ARCH_LAST; ++i)
3241 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3243 for (i = 0; i < pta_size; i++)
3244 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3246 ix86_schedule = processor_alias_table[i].schedule;
3247 ix86_tune = processor_alias_table[i].processor;
3248 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3250 if (ix86_tune_defaulted)
3252 ix86_tune_string = "x86-64";
3253 for (i = 0; i < pta_size; i++)
3254 if (! strcmp (ix86_tune_string,
3255 processor_alias_table[i].name))
3257 ix86_schedule = processor_alias_table[i].schedule;
3258 ix86_tune = processor_alias_table[i].processor;
3261 error ("CPU you selected does not support x86-64 "
3264 /* Intel CPUs have always interpreted SSE prefetch instructions as
3265 NOPs; so, we can enable SSE prefetch instructions even when
3266 -mtune (rather than -march) points us to a processor that has them.
3267 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3268 higher processors. */
3270 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3271 x86_prefetch_sse = true;
3275 if (ix86_tune_specified && i == pta_size)
3276 error ("bad value (%s) for %stune=%s %s",
3277 ix86_tune_string, prefix, suffix, sw);
3279 ix86_tune_mask = 1u << ix86_tune;
3280 for (i = 0; i < X86_TUNE_LAST; ++i)
3281 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3284 ix86_cost = &ix86_size_cost;
3286 ix86_cost = processor_target_table[ix86_tune].cost;
3288 /* Arrange to set up i386_stack_locals for all functions. */
3289 init_machine_status = ix86_init_machine_status;
3291 /* Validate -mregparm= value. */
3292 if (ix86_regparm_string)
3295 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3296 i = atoi (ix86_regparm_string);
3297 if (i < 0 || i > REGPARM_MAX)
3298 error ("%sregparm=%d%s is not between 0 and %d",
3299 prefix, i, suffix, REGPARM_MAX);
3304 ix86_regparm = REGPARM_MAX;
3306 /* If the user has provided any of the -malign-* options,
3307 warn and use that value only if -falign-* is not set.
3308 Remove this code in GCC 3.2 or later. */
3309 if (ix86_align_loops_string)
3311 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3312 prefix, suffix, suffix);
3313 if (align_loops == 0)
3315 i = atoi (ix86_align_loops_string);
3316 if (i < 0 || i > MAX_CODE_ALIGN)
3317 error ("%salign-loops=%d%s is not between 0 and %d",
3318 prefix, i, suffix, MAX_CODE_ALIGN);
3320 align_loops = 1 << i;
3324 if (ix86_align_jumps_string)
3326 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3327 prefix, suffix, suffix);
3328 if (align_jumps == 0)
3330 i = atoi (ix86_align_jumps_string);
3331 if (i < 0 || i > MAX_CODE_ALIGN)
3332 error ("%salign-loops=%d%s is not between 0 and %d",
3333 prefix, i, suffix, MAX_CODE_ALIGN);
3335 align_jumps = 1 << i;
3339 if (ix86_align_funcs_string)
3341 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3342 prefix, suffix, suffix);
3343 if (align_functions == 0)
3345 i = atoi (ix86_align_funcs_string);
3346 if (i < 0 || i > MAX_CODE_ALIGN)
3347 error ("%salign-loops=%d%s is not between 0 and %d",
3348 prefix, i, suffix, MAX_CODE_ALIGN);
3350 align_functions = 1 << i;
3354 /* Default align_* from the processor table. */
3355 if (align_loops == 0)
3357 align_loops = processor_target_table[ix86_tune].align_loop;
3358 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3360 if (align_jumps == 0)
3362 align_jumps = processor_target_table[ix86_tune].align_jump;
3363 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3365 if (align_functions == 0)
3367 align_functions = processor_target_table[ix86_tune].align_func;
3370 /* Validate -mbranch-cost= value, or provide default. */
3371 ix86_branch_cost = ix86_cost->branch_cost;
3372 if (ix86_branch_cost_string)
3374 i = atoi (ix86_branch_cost_string);
3376 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3378 ix86_branch_cost = i;
3380 if (ix86_section_threshold_string)
3382 i = atoi (ix86_section_threshold_string);
3384 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3386 ix86_section_threshold = i;
3389 if (ix86_tls_dialect_string)
3391 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3392 ix86_tls_dialect = TLS_DIALECT_GNU;
3393 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3394 ix86_tls_dialect = TLS_DIALECT_GNU2;
3396 error ("bad value (%s) for %stls-dialect=%s %s",
3397 ix86_tls_dialect_string, prefix, suffix, sw);
3400 if (ix87_precision_string)
3402 i = atoi (ix87_precision_string);
3403 if (i != 32 && i != 64 && i != 80)
3404 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3409 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3411 /* Enable by default the SSE and MMX builtins. Do allow the user to
3412 explicitly disable any of these. In particular, disabling SSE and
3413 MMX for kernel code is extremely useful. */
3414 if (!ix86_arch_specified)
3416 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3417 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3420 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3424 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3426 if (!ix86_arch_specified)
3428 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3430 /* i386 ABI does not specify red zone. It still makes sense to use it
3431 when programmer takes care to stack from being destroyed. */
3432 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3433 target_flags |= MASK_NO_RED_ZONE;
3436 /* Keep nonleaf frame pointers. */
3437 if (flag_omit_frame_pointer)
3438 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3439 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3440 flag_omit_frame_pointer = 1;
3442 /* If we're doing fast math, we don't care about comparison order
3443 wrt NaNs. This lets us use a shorter comparison sequence. */
3444 if (flag_finite_math_only)
3445 target_flags &= ~MASK_IEEE_FP;
3447 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3448 since the insns won't need emulation. */
3449 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3450 target_flags &= ~MASK_NO_FANCY_MATH_387;
3452 /* Likewise, if the target doesn't have a 387, or we've specified
3453 software floating point, don't use 387 inline intrinsics. */
3455 target_flags |= MASK_NO_FANCY_MATH_387;
3457 /* Turn on MMX builtins for -msse. */
3460 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3461 x86_prefetch_sse = true;
3464 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3465 if (TARGET_SSE4_2 || TARGET_ABM)
3466 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3468 /* Validate -mpreferred-stack-boundary= value or default it to
3469 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3470 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3471 if (ix86_preferred_stack_boundary_string)
3473 i = atoi (ix86_preferred_stack_boundary_string);
3474 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3475 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3476 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3478 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3481 /* Set the default value for -mstackrealign. */
3482 if (ix86_force_align_arg_pointer == -1)
3483 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3485 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3487 /* Validate -mincoming-stack-boundary= value or default it to
3488 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3489 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3490 if (ix86_incoming_stack_boundary_string)
3492 i = atoi (ix86_incoming_stack_boundary_string);
3493 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3494 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3495 i, TARGET_64BIT ? 4 : 2);
3498 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3499 ix86_incoming_stack_boundary
3500 = ix86_user_incoming_stack_boundary;
3504 /* Accept -msseregparm only if at least SSE support is enabled. */
3505 if (TARGET_SSEREGPARM
3507 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3509 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3510 if (ix86_fpmath_string != 0)
3512 if (! strcmp (ix86_fpmath_string, "387"))
3513 ix86_fpmath = FPMATH_387;
3514 else if (! strcmp (ix86_fpmath_string, "sse"))
3518 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3519 ix86_fpmath = FPMATH_387;
3522 ix86_fpmath = FPMATH_SSE;
3524 else if (! strcmp (ix86_fpmath_string, "387,sse")
3525 || ! strcmp (ix86_fpmath_string, "387+sse")
3526 || ! strcmp (ix86_fpmath_string, "sse,387")
3527 || ! strcmp (ix86_fpmath_string, "sse+387")
3528 || ! strcmp (ix86_fpmath_string, "both"))
3532 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3533 ix86_fpmath = FPMATH_387;
3535 else if (!TARGET_80387)
3537 warning (0, "387 instruction set disabled, using SSE arithmetics");
3538 ix86_fpmath = FPMATH_SSE;
3541 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3544 error ("bad value (%s) for %sfpmath=%s %s",
3545 ix86_fpmath_string, prefix, suffix, sw);
3548 /* If the i387 is disabled, then do not return values in it. */
3550 target_flags &= ~MASK_FLOAT_RETURNS;
3552 /* Use external vectorized library in vectorizing intrinsics. */
3553 if (ix86_veclibabi_string)
3555 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3556 ix86_veclib_handler = ix86_veclibabi_svml;
3557 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3558 ix86_veclib_handler = ix86_veclibabi_acml;
3560 error ("unknown vectorization library ABI type (%s) for "
3561 "%sveclibabi=%s %s", ix86_veclibabi_string,
3562 prefix, suffix, sw);
3565 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
3566 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3568 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3570 /* ??? Unwind info is not correct around the CFG unless either a frame
3571 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3572 unwind info generation to be aware of the CFG and propagating states
3574 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3575 || flag_exceptions || flag_non_call_exceptions)
3576 && flag_omit_frame_pointer
3577 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3579 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3580 warning (0, "unwind tables currently require either a frame pointer "
3581 "or %saccumulate-outgoing-args%s for correctness",
3583 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3586 /* If stack probes are required, the space used for large function
3587 arguments on the stack must also be probed, so enable
3588 -maccumulate-outgoing-args so this happens in the prologue. */
3589 if (TARGET_STACK_PROBE
3590 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3592 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3593 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3594 "for correctness", prefix, suffix);
3595 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3598 /* For sane SSE instruction set generation we need fcomi instruction.
3599 It is safe to enable all CMOVE instructions. */
3603 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3606 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3607 p = strchr (internal_label_prefix, 'X');
3608 internal_label_prefix_len = p - internal_label_prefix;
3612 /* When scheduling description is not available, disable scheduler pass
3613 so it won't slow down the compilation and make x87 code slower. */
3614 if (!TARGET_SCHEDULE)
3615 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3617 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3618 set_param_value ("simultaneous-prefetches",
3619 ix86_cost->simultaneous_prefetches);
3620 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3621 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3622 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3623 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3624 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3625 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3627 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3628 if (flag_prefetch_loop_arrays < 0
3631 && software_prefetching_beneficial_p ())
3632 flag_prefetch_loop_arrays = 1;
3634 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3635 can be optimized to ap = __builtin_next_arg (0). */
3637 targetm.expand_builtin_va_start = NULL;
3641 ix86_gen_leave = gen_leave_rex64;
3642 ix86_gen_pop1 = gen_popdi1;
3643 ix86_gen_add3 = gen_adddi3;
3644 ix86_gen_sub3 = gen_subdi3;
3645 ix86_gen_sub3_carry = gen_subdi3_carry;
3646 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3647 ix86_gen_monitor = gen_sse3_monitor64;
3648 ix86_gen_andsp = gen_anddi3;
3649 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_64;
3650 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
3651 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
3655 ix86_gen_leave = gen_leave;
3656 ix86_gen_pop1 = gen_popsi1;
3657 ix86_gen_add3 = gen_addsi3;
3658 ix86_gen_sub3 = gen_subsi3;
3659 ix86_gen_sub3_carry = gen_subsi3_carry;
3660 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3661 ix86_gen_monitor = gen_sse3_monitor;
3662 ix86_gen_andsp = gen_andsi3;
3663 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_32;
3664 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
3665 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
3669 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3671 target_flags |= MASK_CLD & ~target_flags_explicit;
3674 /* Save the initial options in case the user does function specific options */
3676 target_option_default_node = target_option_current_node
3677 = build_target_option_node ();
3680 /* Update register usage after having seen the compiler flags. */
3683 ix86_conditional_register_usage (void)
3688 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3690 if (fixed_regs[i] > 1)
3691 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
3692 if (call_used_regs[i] > 1)
3693 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
3696 /* The PIC register, if it exists, is fixed. */
3697 j = PIC_OFFSET_TABLE_REGNUM;
3698 if (j != INVALID_REGNUM)
3699 fixed_regs[j] = call_used_regs[j] = 1;
3701 /* The MS_ABI changes the set of call-used registers. */
3702 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3704 call_used_regs[SI_REG] = 0;
3705 call_used_regs[DI_REG] = 0;
3706 call_used_regs[XMM6_REG] = 0;
3707 call_used_regs[XMM7_REG] = 0;
3708 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3709 call_used_regs[i] = 0;
3712 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3713 other call-clobbered regs for 64-bit. */
3716 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
3718 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3719 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
3720 && call_used_regs[i])
3721 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
3724 /* If MMX is disabled, squash the registers. */
3726 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3727 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
3728 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3730 /* If SSE is disabled, squash the registers. */
3732 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3733 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
3734 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3736 /* If the FPU is disabled, squash the registers. */
3737 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3738 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3739 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
3740 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3742 /* If 32-bit, squash the 64-bit registers. */
3745 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
3747 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3753 /* Save the current options */
3756 ix86_function_specific_save (struct cl_target_option *ptr)
3758 ptr->arch = ix86_arch;
3759 ptr->schedule = ix86_schedule;
3760 ptr->tune = ix86_tune;
3761 ptr->fpmath = ix86_fpmath;
3762 ptr->branch_cost = ix86_branch_cost;
3763 ptr->tune_defaulted = ix86_tune_defaulted;
3764 ptr->arch_specified = ix86_arch_specified;
3765 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3766 ptr->target_flags_explicit = target_flags_explicit;
3768 /* The fields are char but the variables are not; make sure the
3769 values fit in the fields. */
3770 gcc_assert (ptr->arch == ix86_arch);
3771 gcc_assert (ptr->schedule == ix86_schedule);
3772 gcc_assert (ptr->tune == ix86_tune);
3773 gcc_assert (ptr->fpmath == ix86_fpmath);
3774 gcc_assert (ptr->branch_cost == ix86_branch_cost);
3777 /* Restore the current options */
3780 ix86_function_specific_restore (struct cl_target_option *ptr)
3782 enum processor_type old_tune = ix86_tune;
3783 enum processor_type old_arch = ix86_arch;
3784 unsigned int ix86_arch_mask, ix86_tune_mask;
3787 ix86_arch = (enum processor_type) ptr->arch;
3788 ix86_schedule = (enum attr_cpu) ptr->schedule;
3789 ix86_tune = (enum processor_type) ptr->tune;
3790 ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
3791 ix86_branch_cost = ptr->branch_cost;
3792 ix86_tune_defaulted = ptr->tune_defaulted;
3793 ix86_arch_specified = ptr->arch_specified;
3794 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3795 target_flags_explicit = ptr->target_flags_explicit;
3797 /* Recreate the arch feature tests if the arch changed */
3798 if (old_arch != ix86_arch)
3800 ix86_arch_mask = 1u << ix86_arch;
3801 for (i = 0; i < X86_ARCH_LAST; ++i)
3802 ix86_arch_features[i]
3803 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3806 /* Recreate the tune optimization tests */
3807 if (old_tune != ix86_tune)
3809 ix86_tune_mask = 1u << ix86_tune;
3810 for (i = 0; i < X86_TUNE_LAST; ++i)
3811 ix86_tune_features[i]
3812 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3816 /* Print the current options */
3819 ix86_function_specific_print (FILE *file, int indent,
3820 struct cl_target_option *ptr)
3823 = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3824 NULL, NULL, NULL, false);
3826 fprintf (file, "%*sarch = %d (%s)\n",
3829 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3830 ? cpu_names[ptr->arch]
3833 fprintf (file, "%*stune = %d (%s)\n",
3836 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3837 ? cpu_names[ptr->tune]
3840 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3841 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3842 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3843 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3847 fprintf (file, "%*s%s\n", indent, "", target_string);
3848 free (target_string);
3853 /* Inner function to process the attribute((target(...))), take an argument and
3854 set the current options from the argument. If we have a list, recursively go
3858 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3863 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3864 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3865 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3866 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3881 enum ix86_opt_type type;
3886 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3887 IX86_ATTR_ISA ("abm", OPT_mabm),
3888 IX86_ATTR_ISA ("aes", OPT_maes),
3889 IX86_ATTR_ISA ("avx", OPT_mavx),
3890 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3891 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3892 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3893 IX86_ATTR_ISA ("sse", OPT_msse),
3894 IX86_ATTR_ISA ("sse2", OPT_msse2),
3895 IX86_ATTR_ISA ("sse3", OPT_msse3),
3896 IX86_ATTR_ISA ("sse4", OPT_msse4),
3897 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3898 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3899 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3900 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3901 IX86_ATTR_ISA ("fma4", OPT_mfma4),
3902 IX86_ATTR_ISA ("xop", OPT_mxop),
3903 IX86_ATTR_ISA ("lwp", OPT_mlwp),
3904 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
3905 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
3906 IX86_ATTR_ISA ("f16c", OPT_mf16c),
3908 /* string options */
3909 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3910 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3911 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3914 IX86_ATTR_YES ("cld",
3918 IX86_ATTR_NO ("fancy-math-387",
3919 OPT_mfancy_math_387,
3920 MASK_NO_FANCY_MATH_387),
3922 IX86_ATTR_YES ("ieee-fp",
3926 IX86_ATTR_YES ("inline-all-stringops",
3927 OPT_minline_all_stringops,
3928 MASK_INLINE_ALL_STRINGOPS),
3930 IX86_ATTR_YES ("inline-stringops-dynamically",
3931 OPT_minline_stringops_dynamically,
3932 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3934 IX86_ATTR_NO ("align-stringops",
3935 OPT_mno_align_stringops,
3936 MASK_NO_ALIGN_STRINGOPS),
3938 IX86_ATTR_YES ("recip",
3944 /* If this is a list, recurse to get the options. */
3945 if (TREE_CODE (args) == TREE_LIST)
3949 for (; args; args = TREE_CHAIN (args))
3950 if (TREE_VALUE (args)
3951 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3957 else if (TREE_CODE (args) != STRING_CST)
3960 /* Handle multiple arguments separated by commas. */
3961 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
3963 while (next_optstr && *next_optstr != '\0')
3965 char *p = next_optstr;
3967 char *comma = strchr (next_optstr, ',');
3968 const char *opt_string;
3969 size_t len, opt_len;
3974 enum ix86_opt_type type = ix86_opt_unknown;
3980 len = comma - next_optstr;
3981 next_optstr = comma + 1;
3989 /* Recognize no-xxx. */
3990 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
3999 /* Find the option. */
4002 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4004 type = attrs[i].type;
4005 opt_len = attrs[i].len;
4006 if (ch == attrs[i].string[0]
4007 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
4008 && memcmp (p, attrs[i].string, opt_len) == 0)
4011 mask = attrs[i].mask;
4012 opt_string = attrs[i].string;
4017 /* Process the option. */
4020 error ("attribute(target(\"%s\")) is unknown", orig_p);
4024 else if (type == ix86_opt_isa)
4025 ix86_handle_option (opt, p, opt_set_p);
4027 else if (type == ix86_opt_yes || type == ix86_opt_no)
4029 if (type == ix86_opt_no)
4030 opt_set_p = !opt_set_p;
4033 target_flags |= mask;
4035 target_flags &= ~mask;
4038 else if (type == ix86_opt_str)
4042 error ("option(\"%s\") was already specified", opt_string);
4046 p_strings[opt] = xstrdup (p + opt_len);
4056 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4059 ix86_valid_target_attribute_tree (tree args)
4061 const char *orig_arch_string = ix86_arch_string;
4062 const char *orig_tune_string = ix86_tune_string;
4063 const char *orig_fpmath_string = ix86_fpmath_string;
4064 int orig_tune_defaulted = ix86_tune_defaulted;
4065 int orig_arch_specified = ix86_arch_specified;
4066 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
4069 struct cl_target_option *def
4070 = TREE_TARGET_OPTION (target_option_default_node);
4072 /* Process each of the options on the chain. */
4073 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
4076 /* If the changed options are different from the default, rerun override_options,
4077 and then save the options away. The string options are are attribute options,
4078 and will be undone when we copy the save structure. */
4079 if (ix86_isa_flags != def->ix86_isa_flags
4080 || target_flags != def->target_flags
4081 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4082 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4083 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4085 /* If we are using the default tune= or arch=, undo the string assigned,
4086 and use the default. */
4087 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4088 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4089 else if (!orig_arch_specified)
4090 ix86_arch_string = NULL;
4092 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4093 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4094 else if (orig_tune_defaulted)
4095 ix86_tune_string = NULL;
4097 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4098 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4099 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
4100 else if (!TARGET_64BIT && TARGET_SSE)
4101 ix86_fpmath_string = "sse,387";
4103 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4104 override_options (false);
4106 /* Add any builtin functions with the new isa if any. */
4107 ix86_add_new_builtins (ix86_isa_flags);
4109 /* Save the current options unless we are validating options for
4111 t = build_target_option_node ();
4113 ix86_arch_string = orig_arch_string;
4114 ix86_tune_string = orig_tune_string;
4115 ix86_fpmath_string = orig_fpmath_string;
4117 /* Free up memory allocated to hold the strings */
4118 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4119 if (option_strings[i])
4120 free (option_strings[i]);
4126 /* Hook to validate attribute((target("string"))). */
4129 ix86_valid_target_attribute_p (tree fndecl,
4130 tree ARG_UNUSED (name),
4132 int ARG_UNUSED (flags))
4134 struct cl_target_option cur_target;
4136 tree old_optimize = build_optimization_node ();
4137 tree new_target, new_optimize;
4138 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4140 /* If the function changed the optimization levels as well as setting target
4141 options, start with the optimizations specified. */
4142 if (func_optimize && func_optimize != old_optimize)
4143 cl_optimization_restore (TREE_OPTIMIZATION (func_optimize));
4145 /* The target attributes may also change some optimization flags, so update
4146 the optimization options if necessary. */
4147 cl_target_option_save (&cur_target);
4148 new_target = ix86_valid_target_attribute_tree (args);
4149 new_optimize = build_optimization_node ();
4156 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4158 if (old_optimize != new_optimize)
4159 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4162 cl_target_option_restore (&cur_target);
4164 if (old_optimize != new_optimize)
4165 cl_optimization_restore (TREE_OPTIMIZATION (old_optimize));
4171 /* Hook to determine if one function can safely inline another. */
4174 ix86_can_inline_p (tree caller, tree callee)
4177 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4178 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4180 /* If callee has no option attributes, then it is ok to inline. */
4184 /* If caller has no option attributes, but callee does then it is not ok to
4186 else if (!caller_tree)
4191 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4192 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4194 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4195 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4197 if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
4198 != callee_opts->ix86_isa_flags)
4201 /* See if we have the same non-isa options. */
4202 else if (caller_opts->target_flags != callee_opts->target_flags)
4205 /* See if arch, tune, etc. are the same. */
4206 else if (caller_opts->arch != callee_opts->arch)
4209 else if (caller_opts->tune != callee_opts->tune)
4212 else if (caller_opts->fpmath != callee_opts->fpmath)
4215 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4226 /* Remember the last target of ix86_set_current_function. */
4227 static GTY(()) tree ix86_previous_fndecl;
4229 /* Establish appropriate back-end context for processing the function
4230 FNDECL. The argument might be NULL to indicate processing at top
4231 level, outside of any function scope. */
4233 ix86_set_current_function (tree fndecl)
4235 /* Only change the context if the function changes. This hook is called
4236 several times in the course of compiling a function, and we don't want to
4237 slow things down too much or call target_reinit when it isn't safe. */
4238 if (fndecl && fndecl != ix86_previous_fndecl)
4240 tree old_tree = (ix86_previous_fndecl
4241 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4244 tree new_tree = (fndecl
4245 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4248 ix86_previous_fndecl = fndecl;
4249 if (old_tree == new_tree)
4254 cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
4260 struct cl_target_option *def
4261 = TREE_TARGET_OPTION (target_option_current_node);
4263 cl_target_option_restore (def);
4270 /* Return true if this goes in large data/bss. */
4273 ix86_in_large_data_p (tree exp)
4275 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4278 /* Functions are never large data. */
4279 if (TREE_CODE (exp) == FUNCTION_DECL)
4282 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4284 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4285 if (strcmp (section, ".ldata") == 0
4286 || strcmp (section, ".lbss") == 0)
4292 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4294 /* If this is an incomplete type with size 0, then we can't put it
4295 in data because it might be too big when completed. */
4296 if (!size || size > ix86_section_threshold)
4303 /* Switch to the appropriate section for output of DECL.
4304 DECL is either a `VAR_DECL' node or a constant of some sort.
4305 RELOC indicates whether forming the initial value of DECL requires
4306 link-time relocations. */
4308 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4312 x86_64_elf_select_section (tree decl, int reloc,
4313 unsigned HOST_WIDE_INT align)
4315 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4316 && ix86_in_large_data_p (decl))
4318 const char *sname = NULL;
4319 unsigned int flags = SECTION_WRITE;
4320 switch (categorize_decl_for_section (decl, reloc))
4325 case SECCAT_DATA_REL:
4326 sname = ".ldata.rel";
4328 case SECCAT_DATA_REL_LOCAL:
4329 sname = ".ldata.rel.local";
4331 case SECCAT_DATA_REL_RO:
4332 sname = ".ldata.rel.ro";
4334 case SECCAT_DATA_REL_RO_LOCAL:
4335 sname = ".ldata.rel.ro.local";
4339 flags |= SECTION_BSS;
4342 case SECCAT_RODATA_MERGE_STR:
4343 case SECCAT_RODATA_MERGE_STR_INIT:
4344 case SECCAT_RODATA_MERGE_CONST:
4348 case SECCAT_SRODATA:
4355 /* We don't split these for medium model. Place them into
4356 default sections and hope for best. */
4361 /* We might get called with string constants, but get_named_section
4362 doesn't like them as they are not DECLs. Also, we need to set
4363 flags in that case. */
4365 return get_section (sname, flags, NULL);
4366 return get_named_section (decl, sname, reloc);
4369 return default_elf_select_section (decl, reloc, align);
4372 /* Build up a unique section name, expressed as a
4373 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4374 RELOC indicates whether the initial value of EXP requires
4375 link-time relocations. */
4377 static void ATTRIBUTE_UNUSED
4378 x86_64_elf_unique_section (tree decl, int reloc)
4380 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4381 && ix86_in_large_data_p (decl))
4383 const char *prefix = NULL;
4384 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4385 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4387 switch (categorize_decl_for_section (decl, reloc))
4390 case SECCAT_DATA_REL:
4391 case SECCAT_DATA_REL_LOCAL:
4392 case SECCAT_DATA_REL_RO:
4393 case SECCAT_DATA_REL_RO_LOCAL:
4394 prefix = one_only ? ".ld" : ".ldata";
4397 prefix = one_only ? ".lb" : ".lbss";
4400 case SECCAT_RODATA_MERGE_STR:
4401 case SECCAT_RODATA_MERGE_STR_INIT:
4402 case SECCAT_RODATA_MERGE_CONST:
4403 prefix = one_only ? ".lr" : ".lrodata";
4405 case SECCAT_SRODATA:
4412 /* We don't split these for medium model. Place them into
4413 default sections and hope for best. */
4418 const char *name, *linkonce;
4421 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4422 name = targetm.strip_name_encoding (name);
4424 /* If we're using one_only, then there needs to be a .gnu.linkonce
4425 prefix to the section name. */
4426 linkonce = one_only ? ".gnu.linkonce" : "";
4428 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4430 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4434 default_unique_section (decl, reloc);
4437 #ifdef COMMON_ASM_OP
4438 /* This says how to output assembler code to declare an
4439 uninitialized external linkage data object.
4441 For medium model x86-64 we need to use .largecomm opcode for
4444 x86_elf_aligned_common (FILE *file,
4445 const char *name, unsigned HOST_WIDE_INT size,
4448 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4449 && size > (unsigned int)ix86_section_threshold)
4450 fputs (".largecomm\t", file);
4452 fputs (COMMON_ASM_OP, file);
4453 assemble_name (file, name);
4454 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4455 size, align / BITS_PER_UNIT);
4459 /* Utility function for targets to use in implementing
4460 ASM_OUTPUT_ALIGNED_BSS. */
4463 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4464 const char *name, unsigned HOST_WIDE_INT size,
4467 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4468 && size > (unsigned int)ix86_section_threshold)
4469 switch_to_section (get_named_section (decl, ".lbss", 0));
4471 switch_to_section (bss_section);
4472 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4473 #ifdef ASM_DECLARE_OBJECT_NAME
4474 last_assemble_variable_decl = decl;
4475 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4477 /* Standard thing is just output label for the object. */
4478 ASM_OUTPUT_LABEL (file, name);
4479 #endif /* ASM_DECLARE_OBJECT_NAME */
4480 ASM_OUTPUT_SKIP (file, size ? size : 1);
4484 optimization_options (int level, int size ATTRIBUTE_UNUSED)
4486 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
4487 make the problem with not enough registers even worse. */
4488 #ifdef INSN_SCHEDULING
4490 flag_schedule_insns = 0;
4494 /* The Darwin libraries never set errno, so we might as well
4495 avoid calling them when that's the only reason we would. */
4496 flag_errno_math = 0;
4498 /* The default values of these switches depend on the TARGET_64BIT
4499 that is not known at this moment. Mark these values with 2 and
4500 let user the to override these. In case there is no command line option
4501 specifying them, we will set the defaults in override_options. */
4503 flag_omit_frame_pointer = 2;
4505 /* For -O2 and beyond, turn on -fzee for x86_64 target. */
4509 flag_pcc_struct_return = 2;
4510 flag_asynchronous_unwind_tables = 2;
4511 flag_vect_cost_model = 1;
4512 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4513 SUBTARGET_OPTIMIZATION_OPTIONS;
4517 /* Decide whether we must probe the stack before any space allocation
4518 on this target. It's essentially TARGET_STACK_PROBE except when
4519 -fstack-check causes the stack to be already probed differently. */
4522 ix86_target_stack_probe (void)
4524 /* Do not probe the stack twice if static stack checking is enabled. */
4525 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
4528 return TARGET_STACK_PROBE;
4531 /* Decide whether we can make a sibling call to a function. DECL is the
4532 declaration of the function being targeted by the call and EXP is the
4533 CALL_EXPR representing the call. */
4536 ix86_function_ok_for_sibcall (tree decl, tree exp)
4538 tree type, decl_or_type;
4541 /* If we are generating position-independent code, we cannot sibcall
4542 optimize any indirect call, or a direct call to a global function,
4543 as the PLT requires %ebx be live. */
4544 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4547 /* If we need to align the outgoing stack, then sibcalling would
4548 unalign the stack, which may break the called function. */
4549 if (ix86_minimum_incoming_stack_boundary (true)
4550 < PREFERRED_STACK_BOUNDARY)
4555 decl_or_type = decl;
4556 type = TREE_TYPE (decl);
4560 /* We're looking at the CALL_EXPR, we need the type of the function. */
4561 type = CALL_EXPR_FN (exp); /* pointer expression */
4562 type = TREE_TYPE (type); /* pointer type */
4563 type = TREE_TYPE (type); /* function type */
4564 decl_or_type = type;
4567 /* Check that the return value locations are the same. Like
4568 if we are returning floats on the 80387 register stack, we cannot
4569 make a sibcall from a function that doesn't return a float to a
4570 function that does or, conversely, from a function that does return
4571 a float to a function that doesn't; the necessary stack adjustment
4572 would not be executed. This is also the place we notice
4573 differences in the return value ABI. Note that it is ok for one
4574 of the functions to have void return type as long as the return
4575 value of the other is passed in a register. */
4576 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4577 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4579 if (STACK_REG_P (a) || STACK_REG_P (b))
4581 if (!rtx_equal_p (a, b))
4584 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4586 else if (!rtx_equal_p (a, b))
4591 /* The SYSV ABI has more call-clobbered registers;
4592 disallow sibcalls from MS to SYSV. */
4593 if (cfun->machine->call_abi == MS_ABI
4594 && ix86_function_type_abi (type) == SYSV_ABI)
4599 /* If this call is indirect, we'll need to be able to use a
4600 call-clobbered register for the address of the target function.
4601 Make sure that all such registers are not used for passing
4602 parameters. Note that DLLIMPORT functions are indirect. */
4604 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4606 if (ix86_function_regparm (type, NULL) >= 3)
4608 /* ??? Need to count the actual number of registers to be used,
4609 not the possible number of registers. Fix later. */
4615 /* Otherwise okay. That also includes certain types of indirect calls. */
4619 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4620 and "sseregparm" calling convention attributes;
4621 arguments as in struct attribute_spec.handler. */
4624 ix86_handle_cconv_attribute (tree *node, tree name,
4626 int flags ATTRIBUTE_UNUSED,
4629 if (TREE_CODE (*node) != FUNCTION_TYPE
4630 && TREE_CODE (*node) != METHOD_TYPE
4631 && TREE_CODE (*node) != FIELD_DECL
4632 && TREE_CODE (*node) != TYPE_DECL)
4634 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4636 *no_add_attrs = true;
4640 /* Can combine regparm with all attributes but fastcall. */
4641 if (is_attribute_p ("regparm", name))
4645 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4647 error ("fastcall and regparm attributes are not compatible");
4650 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4652 error ("regparam and thiscall attributes are not compatible");
4655 cst = TREE_VALUE (args);
4656 if (TREE_CODE (cst) != INTEGER_CST)
4658 warning (OPT_Wattributes,
4659 "%qE attribute requires an integer constant argument",
4661 *no_add_attrs = true;
4663 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4665 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4667 *no_add_attrs = true;
4675 /* Do not warn when emulating the MS ABI. */
4676 if ((TREE_CODE (*node) != FUNCTION_TYPE
4677 && TREE_CODE (*node) != METHOD_TYPE)
4678 || ix86_function_type_abi (*node) != MS_ABI)
4679 warning (OPT_Wattributes, "%qE attribute ignored",
4681 *no_add_attrs = true;
4685 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4686 if (is_attribute_p ("fastcall", name))
4688 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4690 error ("fastcall and cdecl attributes are not compatible");
4692 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4694 error ("fastcall and stdcall attributes are not compatible");
4696 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4698 error ("fastcall and regparm attributes are not compatible");
4700 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4702 error ("fastcall and thiscall attributes are not compatible");
4706 /* Can combine stdcall with fastcall (redundant), regparm and
4708 else if (is_attribute_p ("stdcall", name))
4710 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4712 error ("stdcall and cdecl attributes are not compatible");
4714 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4716 error ("stdcall and fastcall attributes are not compatible");
4718 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4720 error ("stdcall and thiscall attributes are not compatible");
4724 /* Can combine cdecl with regparm and sseregparm. */
4725 else if (is_attribute_p ("cdecl", name))
4727 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4729 error ("stdcall and cdecl attributes are not compatible");
4731 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4733 error ("fastcall and cdecl attributes are not compatible");
4735 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4737 error ("cdecl and thiscall attributes are not compatible");
4740 else if (is_attribute_p ("thiscall", name))
4742 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
4743 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
4745 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4747 error ("stdcall and thiscall attributes are not compatible");
4749 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4751 error ("fastcall and thiscall attributes are not compatible");
4753 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4755 error ("cdecl and thiscall attributes are not compatible");
4759 /* Can combine sseregparm with all attributes. */
4764 /* Return 0 if the attributes for two types are incompatible, 1 if they
4765 are compatible, and 2 if they are nearly compatible (which causes a
4766 warning to be generated). */
4769 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4771 /* Check for mismatch of non-default calling convention. */
4772 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4774 if (TREE_CODE (type1) != FUNCTION_TYPE
4775 && TREE_CODE (type1) != METHOD_TYPE)
4778 /* Check for mismatched fastcall/regparm types. */
4779 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4780 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4781 || (ix86_function_regparm (type1, NULL)
4782 != ix86_function_regparm (type2, NULL)))
4785 /* Check for mismatched sseregparm types. */
4786 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4787 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4790 /* Check for mismatched thiscall types. */
4791 if (!lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type1))
4792 != !lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type2)))
4795 /* Check for mismatched return types (cdecl vs stdcall). */
4796 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4797 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4803 /* Return the regparm value for a function with the indicated TYPE and DECL.
4804 DECL may be NULL when calling function indirectly
4805 or considering a libcall. */
4808 ix86_function_regparm (const_tree type, const_tree decl)
4814 return (ix86_function_type_abi (type) == SYSV_ABI
4815 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
4817 regparm = ix86_regparm;
4818 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4821 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4825 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4828 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
4831 /* Use register calling convention for local functions when possible. */
4833 && TREE_CODE (decl) == FUNCTION_DECL
4837 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4838 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
4841 int local_regparm, globals = 0, regno;
4843 /* Make sure no regparm register is taken by a
4844 fixed register variable. */
4845 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4846 if (fixed_regs[local_regparm])
4849 /* We don't want to use regparm(3) for nested functions as
4850 these use a static chain pointer in the third argument. */
4851 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
4854 /* Each fixed register usage increases register pressure,
4855 so less registers should be used for argument passing.
4856 This functionality can be overriden by an explicit
4858 for (regno = 0; regno <= DI_REG; regno++)
4859 if (fixed_regs[regno])
4863 = globals < local_regparm ? local_regparm - globals : 0;
4865 if (local_regparm > regparm)
4866 regparm = local_regparm;
4873 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4874 DFmode (2) arguments in SSE registers for a function with the
4875 indicated TYPE and DECL. DECL may be NULL when calling function
4876 indirectly or considering a libcall. Otherwise return 0. */
4879 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4881 gcc_assert (!TARGET_64BIT);
4883 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4884 by the sseregparm attribute. */
4885 if (TARGET_SSEREGPARM
4886 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4893 error ("Calling %qD with attribute sseregparm without "
4894 "SSE/SSE2 enabled", decl);
4896 error ("Calling %qT with attribute sseregparm without "
4897 "SSE/SSE2 enabled", type);
4905 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4906 (and DFmode for SSE2) arguments in SSE registers. */
4907 if (decl && TARGET_SSE_MATH && optimize && !profile_flag)
4909 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4910 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4912 return TARGET_SSE2 ? 2 : 1;
4918 /* Return true if EAX is live at the start of the function. Used by
4919 ix86_expand_prologue to determine if we need special help before
4920 calling allocate_stack_worker. */
4923 ix86_eax_live_at_start_p (void)
4925 /* Cheat. Don't bother working forward from ix86_function_regparm
4926 to the function type to whether an actual argument is located in
4927 eax. Instead just look at cfg info, which is still close enough
4928 to correct at this point. This gives false positives for broken
4929 functions that might use uninitialized data that happens to be
4930 allocated in eax, but who cares? */
4931 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4934 /* Value is the number of bytes of arguments automatically
4935 popped when returning from a subroutine call.
4936 FUNDECL is the declaration node of the function (as a tree),
4937 FUNTYPE is the data type of the function (as a tree),
4938 or for a library call it is an identifier node for the subroutine name.
4939 SIZE is the number of bytes of arguments passed on the stack.
4941 On the 80386, the RTD insn may be used to pop them if the number
4942 of args is fixed, but if the number is variable then the caller
4943 must pop them all. RTD can't be used for library calls now
4944 because the library is compiled with the Unix compiler.
4945 Use of RTD is a selectable option, since it is incompatible with
4946 standard Unix calling sequences. If the option is not selected,
4947 the caller must always pop the args.
4949 The attribute stdcall is equivalent to RTD on a per module basis. */
4952 ix86_return_pops_args (tree fundecl, tree funtype, int size)
4956 /* None of the 64-bit ABIs pop arguments. */
4960 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
4962 /* Cdecl functions override -mrtd, and never pop the stack. */
4963 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
4965 /* Stdcall and fastcall functions will pop the stack if not
4967 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
4968 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype))
4969 || lookup_attribute ("thiscall", TYPE_ATTRIBUTES (funtype)))
4972 if (rtd && ! stdarg_p (funtype))
4976 /* Lose any fake structure return argument if it is passed on the stack. */
4977 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
4978 && !KEEP_AGGREGATE_RETURN_POINTER)
4980 int nregs = ix86_function_regparm (funtype, fundecl);
4982 return GET_MODE_SIZE (Pmode);
4988 /* Argument support functions. */
4990 /* Return true when register may be used to pass function parameters. */
4992 ix86_function_arg_regno_p (int regno)
4995 const int *parm_regs;
5000 return (regno < REGPARM_MAX
5001 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5003 return (regno < REGPARM_MAX
5004 || (TARGET_MMX && MMX_REGNO_P (regno)
5005 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5006 || (TARGET_SSE && SSE_REGNO_P (regno)
5007 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5012 if (SSE_REGNO_P (regno) && TARGET_SSE)
5017 if (TARGET_SSE && SSE_REGNO_P (regno)
5018 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5022 /* TODO: The function should depend on current function ABI but
5023 builtins.c would need updating then. Therefore we use the
5026 /* RAX is used as hidden argument to va_arg functions. */
5027 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5030 if (ix86_abi == MS_ABI)
5031 parm_regs = x86_64_ms_abi_int_parameter_registers;
5033 parm_regs = x86_64_int_parameter_registers;
5034 for (i = 0; i < (ix86_abi == MS_ABI
5035 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5036 if (regno == parm_regs[i])
5041 /* Return if we do not know how to pass TYPE solely in registers. */
5044 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5046 if (must_pass_in_stack_var_size_or_pad (mode, type))
5049 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5050 The layout_type routine is crafty and tries to trick us into passing
5051 currently unsupported vector types on the stack by using TImode. */
5052 return (!TARGET_64BIT && mode == TImode
5053 && type && TREE_CODE (type) != VECTOR_TYPE);
5056 /* It returns the size, in bytes, of the area reserved for arguments passed
5057 in registers for the function represented by fndecl dependent to the used
5060 ix86_reg_parm_stack_space (const_tree fndecl)
5062 enum calling_abi call_abi = SYSV_ABI;
5063 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5064 call_abi = ix86_function_abi (fndecl);
5066 call_abi = ix86_function_type_abi (fndecl);
5067 if (call_abi == MS_ABI)
5072 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5075 ix86_function_type_abi (const_tree fntype)
5077 if (TARGET_64BIT && fntype != NULL)
5079 enum calling_abi abi = ix86_abi;
5080 if (abi == SYSV_ABI)
5082 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5085 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5093 ix86_function_ms_hook_prologue (const_tree fntype)
5095 if (fntype && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fntype)))
5097 if (decl_function_context (fntype) != NULL_TREE)
5099 error_at (DECL_SOURCE_LOCATION (fntype),
5100 "ms_hook_prologue is not compatible with nested function");
5108 static enum calling_abi
5109 ix86_function_abi (const_tree fndecl)
5113 return ix86_function_type_abi (TREE_TYPE (fndecl));
5116 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5119 ix86_cfun_abi (void)
5121 if (! cfun || ! TARGET_64BIT)
5123 return cfun->machine->call_abi;
5126 /* Write the extra assembler code needed to declare a function properly. */
5129 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5132 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5136 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5137 unsigned int filler_cc = 0xcccccccc;
5139 for (i = 0; i < filler_count; i += 4)
5140 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5143 ASM_OUTPUT_LABEL (asm_out_file, fname);
5145 /* Output magic byte marker, if hot-patch attribute is set.
5146 For x86 case frame-pointer prologue will be emitted in
5151 /* leaq [%rsp + 0], %rsp */
5152 asm_fprintf (asm_out_file, ASM_BYTE
5153 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5155 /* movl.s %edi, %edi. */
5156 asm_fprintf (asm_out_file, ASM_BYTE "0x8b, 0xff\n");
5161 extern void init_regs (void);
5163 /* Implementation of call abi switching target hook. Specific to FNDECL
5164 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
5165 for more details. */
5167 ix86_call_abi_override (const_tree fndecl)
5169 if (fndecl == NULL_TREE)
5170 cfun->machine->call_abi = ix86_abi;
5172 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5175 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
5176 re-initialization of init_regs each time we switch function context since
5177 this is needed only during RTL expansion. */
5179 ix86_maybe_switch_abi (void)
5182 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5186 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5187 for a call to a function whose data type is FNTYPE.
5188 For a library call, FNTYPE is 0. */
5191 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5192 tree fntype, /* tree ptr for function decl */
5193 rtx libname, /* SYMBOL_REF of library name or 0 */
5196 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
5197 memset (cum, 0, sizeof (*cum));
5200 cum->call_abi = ix86_function_abi (fndecl);
5202 cum->call_abi = ix86_function_type_abi (fntype);
5203 /* Set up the number of registers to use for passing arguments. */
5205 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5206 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5207 "or subtarget optimization implying it");
5208 cum->nregs = ix86_regparm;
5211 cum->nregs = (cum->call_abi == SYSV_ABI
5212 ? X86_64_REGPARM_MAX
5213 : X86_64_MS_REGPARM_MAX);
5217 cum->sse_nregs = SSE_REGPARM_MAX;
5220 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5221 ? X86_64_SSE_REGPARM_MAX
5222 : X86_64_MS_SSE_REGPARM_MAX);
5226 cum->mmx_nregs = MMX_REGPARM_MAX;
5227 cum->warn_avx = true;
5228 cum->warn_sse = true;
5229 cum->warn_mmx = true;
5231 /* Because type might mismatch in between caller and callee, we need to
5232 use actual type of function for local calls.
5233 FIXME: cgraph_analyze can be told to actually record if function uses
5234 va_start so for local functions maybe_vaarg can be made aggressive
5236 FIXME: once typesytem is fixed, we won't need this code anymore. */
5238 fntype = TREE_TYPE (fndecl);
5239 cum->maybe_vaarg = (fntype
5240 ? (!prototype_p (fntype) || stdarg_p (fntype))
5245 /* If there are variable arguments, then we won't pass anything
5246 in registers in 32-bit mode. */
5247 if (stdarg_p (fntype))
5258 /* Use ecx and edx registers if function has fastcall attribute,
5259 else look for regparm information. */
5262 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
5265 cum->fastcall = 1; /* Same first register as in fastcall. */
5267 else if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
5273 cum->nregs = ix86_function_regparm (fntype, fndecl);
5276 /* Set up the number of SSE registers used for passing SFmode
5277 and DFmode arguments. Warn for mismatching ABI. */
5278 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
5282 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5283 But in the case of vector types, it is some vector mode.
5285 When we have only some of our vector isa extensions enabled, then there
5286 are some modes for which vector_mode_supported_p is false. For these
5287 modes, the generic vector support in gcc will choose some non-vector mode
5288 in order to implement the type. By computing the natural mode, we'll
5289 select the proper ABI location for the operand and not depend on whatever
5290 the middle-end decides to do with these vector types.
5292 The midde-end can't deal with the vector types > 16 bytes. In this
5293 case, we return the original mode and warn ABI change if CUM isn't
5296 static enum machine_mode
5297 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
5299 enum machine_mode mode = TYPE_MODE (type);
5301 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
5303 HOST_WIDE_INT size = int_size_in_bytes (type);
5304 if ((size == 8 || size == 16 || size == 32)
5305 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5306 && TYPE_VECTOR_SUBPARTS (type) > 1)
5308 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
5310 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5311 mode = MIN_MODE_VECTOR_FLOAT;
5313 mode = MIN_MODE_VECTOR_INT;
5315 /* Get the mode which has this inner mode and number of units. */
5316 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
5317 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
5318 && GET_MODE_INNER (mode) == innermode)
5320 if (size == 32 && !TARGET_AVX)
5322 static bool warnedavx;
5329 warning (0, "AVX vector argument without AVX "
5330 "enabled changes the ABI");
5332 return TYPE_MODE (type);
5345 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5346 this may not agree with the mode that the type system has chosen for the
5347 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5348 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5351 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5356 if (orig_mode != BLKmode)
5357 tmp = gen_rtx_REG (orig_mode, regno);
5360 tmp = gen_rtx_REG (mode, regno);
5361 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5362 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5368 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5369 of this code is to classify each 8bytes of incoming argument by the register
5370 class and assign registers accordingly. */
5372 /* Return the union class of CLASS1 and CLASS2.
5373 See the x86-64 PS ABI for details. */
5375 static enum x86_64_reg_class
5376 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5378 /* Rule #1: If both classes are equal, this is the resulting class. */
5379 if (class1 == class2)
5382 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5384 if (class1 == X86_64_NO_CLASS)
5386 if (class2 == X86_64_NO_CLASS)
5389 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5390 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5391 return X86_64_MEMORY_CLASS;
5393 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5394 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5395 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5396 return X86_64_INTEGERSI_CLASS;
5397 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5398 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5399 return X86_64_INTEGER_CLASS;
5401 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5403 if (class1 == X86_64_X87_CLASS
5404 || class1 == X86_64_X87UP_CLASS
5405 || class1 == X86_64_COMPLEX_X87_CLASS
5406 || class2 == X86_64_X87_CLASS
5407 || class2 == X86_64_X87UP_CLASS
5408 || class2 == X86_64_COMPLEX_X87_CLASS)
5409 return X86_64_MEMORY_CLASS;
5411 /* Rule #6: Otherwise class SSE is used. */
5412 return X86_64_SSE_CLASS;
5415 /* Classify the argument of type TYPE and mode MODE.
5416 CLASSES will be filled by the register class used to pass each word
5417 of the operand. The number of words is returned. In case the parameter
5418 should be passed in memory, 0 is returned. As a special case for zero
5419 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5421 BIT_OFFSET is used internally for handling records and specifies offset
5422 of the offset in bits modulo 256 to avoid overflow cases.
5424 See the x86-64 PS ABI for details.
5428 classify_argument (enum machine_mode mode, const_tree type,
5429 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5431 HOST_WIDE_INT bytes =
5432 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5433 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5435 /* Variable sized entities are always passed/returned in memory. */
5439 if (mode != VOIDmode
5440 && targetm.calls.must_pass_in_stack (mode, type))
5443 if (type && AGGREGATE_TYPE_P (type))
5447 enum x86_64_reg_class subclasses[MAX_CLASSES];
5449 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5453 for (i = 0; i < words; i++)
5454 classes[i] = X86_64_NO_CLASS;
5456 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5457 signalize memory class, so handle it as special case. */
5460 classes[0] = X86_64_NO_CLASS;
5464 /* Classify each field of record and merge classes. */
5465 switch (TREE_CODE (type))
5468 /* And now merge the fields of structure. */
5469 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5471 if (TREE_CODE (field) == FIELD_DECL)
5475 if (TREE_TYPE (field) == error_mark_node)
5478 /* Bitfields are always classified as integer. Handle them
5479 early, since later code would consider them to be
5480 misaligned integers. */
5481 if (DECL_BIT_FIELD (field))
5483 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5484 i < ((int_bit_position (field) + (bit_offset % 64))
5485 + tree_low_cst (DECL_SIZE (field), 0)
5488 merge_classes (X86_64_INTEGER_CLASS,
5495 type = TREE_TYPE (field);
5497 /* Flexible array member is ignored. */
5498 if (TYPE_MODE (type) == BLKmode
5499 && TREE_CODE (type) == ARRAY_TYPE
5500 && TYPE_SIZE (type) == NULL_TREE
5501 && TYPE_DOMAIN (type) != NULL_TREE
5502 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5507 if (!warned && warn_psabi)
5510 inform (input_location,
5511 "The ABI of passing struct with"
5512 " a flexible array member has"
5513 " changed in GCC 4.4");
5517 num = classify_argument (TYPE_MODE (type), type,
5519 (int_bit_position (field)
5520 + bit_offset) % 256);
5523 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5524 for (i = 0; i < num && (i + pos) < words; i++)
5526 merge_classes (subclasses[i], classes[i + pos]);
5533 /* Arrays are handled as small records. */
5536 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5537 TREE_TYPE (type), subclasses, bit_offset);
5541 /* The partial classes are now full classes. */
5542 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5543 subclasses[0] = X86_64_SSE_CLASS;
5544 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5545 && !((bit_offset % 64) == 0 && bytes == 4))
5546 subclasses[0] = X86_64_INTEGER_CLASS;
5548 for (i = 0; i < words; i++)
5549 classes[i] = subclasses[i % num];
5554 case QUAL_UNION_TYPE:
5555 /* Unions are similar to RECORD_TYPE but offset is always 0.
5557 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5559 if (TREE_CODE (field) == FIELD_DECL)
5563 if (TREE_TYPE (field) == error_mark_node)
5566 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5567 TREE_TYPE (field), subclasses,
5571 for (i = 0; i < num; i++)
5572 classes[i] = merge_classes (subclasses[i], classes[i]);
5583 /* When size > 16 bytes, if the first one isn't
5584 X86_64_SSE_CLASS or any other ones aren't
5585 X86_64_SSEUP_CLASS, everything should be passed in
5587 if (classes[0] != X86_64_SSE_CLASS)
5590 for (i = 1; i < words; i++)
5591 if (classes[i] != X86_64_SSEUP_CLASS)
5595 /* Final merger cleanup. */
5596 for (i = 0; i < words; i++)
5598 /* If one class is MEMORY, everything should be passed in
5600 if (classes[i] == X86_64_MEMORY_CLASS)
5603 /* The X86_64_SSEUP_CLASS should be always preceded by
5604 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5605 if (classes[i] == X86_64_SSEUP_CLASS
5606 && classes[i - 1] != X86_64_SSE_CLASS
5607 && classes[i - 1] != X86_64_SSEUP_CLASS)
5609 /* The first one should never be X86_64_SSEUP_CLASS. */
5610 gcc_assert (i != 0);
5611 classes[i] = X86_64_SSE_CLASS;
5614 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5615 everything should be passed in memory. */
5616 if (classes[i] == X86_64_X87UP_CLASS
5617 && (classes[i - 1] != X86_64_X87_CLASS))
5621 /* The first one should never be X86_64_X87UP_CLASS. */
5622 gcc_assert (i != 0);
5623 if (!warned && warn_psabi)
5626 inform (input_location,
5627 "The ABI of passing union with long double"
5628 " has changed in GCC 4.4");
5636 /* Compute alignment needed. We align all types to natural boundaries with
5637 exception of XFmode that is aligned to 64bits. */
5638 if (mode != VOIDmode && mode != BLKmode)
5640 int mode_alignment = GET_MODE_BITSIZE (mode);
5643 mode_alignment = 128;
5644 else if (mode == XCmode)
5645 mode_alignment = 256;
5646 if (COMPLEX_MODE_P (mode))
5647 mode_alignment /= 2;
5648 /* Misaligned fields are always returned in memory. */
5649 if (bit_offset % mode_alignment)
5653 /* for V1xx modes, just use the base mode */
5654 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
5655 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5656 mode = GET_MODE_INNER (mode);
5658 /* Classification of atomic types. */
5663 classes[0] = X86_64_SSE_CLASS;
5666 classes[0] = X86_64_SSE_CLASS;
5667 classes[1] = X86_64_SSEUP_CLASS;
5677 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5681 classes[0] = X86_64_INTEGERSI_CLASS;
5684 else if (size <= 64)
5686 classes[0] = X86_64_INTEGER_CLASS;
5689 else if (size <= 64+32)
5691 classes[0] = X86_64_INTEGER_CLASS;
5692 classes[1] = X86_64_INTEGERSI_CLASS;
5695 else if (size <= 64+64)
5697 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5705 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5709 /* OImode shouldn't be used directly. */
5714 if (!(bit_offset % 64))
5715 classes[0] = X86_64_SSESF_CLASS;
5717 classes[0] = X86_64_SSE_CLASS;
5720 classes[0] = X86_64_SSEDF_CLASS;
5723 classes[0] = X86_64_X87_CLASS;
5724 classes[1] = X86_64_X87UP_CLASS;
5727 classes[0] = X86_64_SSE_CLASS;
5728 classes[1] = X86_64_SSEUP_CLASS;
5731 classes[0] = X86_64_SSE_CLASS;
5732 if (!(bit_offset % 64))
5738 if (!warned && warn_psabi)
5741 inform (input_location,
5742 "The ABI of passing structure with complex float"
5743 " member has changed in GCC 4.4");
5745 classes[1] = X86_64_SSESF_CLASS;
5749 classes[0] = X86_64_SSEDF_CLASS;
5750 classes[1] = X86_64_SSEDF_CLASS;
5753 classes[0] = X86_64_COMPLEX_X87_CLASS;
5756 /* This modes is larger than 16 bytes. */
5764 classes[0] = X86_64_SSE_CLASS;
5765 classes[1] = X86_64_SSEUP_CLASS;
5766 classes[2] = X86_64_SSEUP_CLASS;
5767 classes[3] = X86_64_SSEUP_CLASS;
5775 classes[0] = X86_64_SSE_CLASS;
5776 classes[1] = X86_64_SSEUP_CLASS;
5784 classes[0] = X86_64_SSE_CLASS;
5790 gcc_assert (VECTOR_MODE_P (mode));
5795 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5797 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5798 classes[0] = X86_64_INTEGERSI_CLASS;
5800 classes[0] = X86_64_INTEGER_CLASS;
5801 classes[1] = X86_64_INTEGER_CLASS;
5802 return 1 + (bytes > 8);
5806 /* Examine the argument and return set number of register required in each
5807 class. Return 0 iff parameter should be passed in memory. */
5809 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5810 int *int_nregs, int *sse_nregs)
5812 enum x86_64_reg_class regclass[MAX_CLASSES];
5813 int n = classify_argument (mode, type, regclass, 0);
5819 for (n--; n >= 0; n--)
5820 switch (regclass[n])
5822 case X86_64_INTEGER_CLASS:
5823 case X86_64_INTEGERSI_CLASS:
5826 case X86_64_SSE_CLASS:
5827 case X86_64_SSESF_CLASS:
5828 case X86_64_SSEDF_CLASS:
5831 case X86_64_NO_CLASS:
5832 case X86_64_SSEUP_CLASS:
5834 case X86_64_X87_CLASS:
5835 case X86_64_X87UP_CLASS:
5839 case X86_64_COMPLEX_X87_CLASS:
5840 return in_return ? 2 : 0;
5841 case X86_64_MEMORY_CLASS:
5847 /* Construct container for the argument used by GCC interface. See
5848 FUNCTION_ARG for the detailed description. */
5851 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5852 const_tree type, int in_return, int nintregs, int nsseregs,
5853 const int *intreg, int sse_regno)
5855 /* The following variables hold the static issued_error state. */
5856 static bool issued_sse_arg_error;
5857 static bool issued_sse_ret_error;
5858 static bool issued_x87_ret_error;
5860 enum machine_mode tmpmode;
5862 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5863 enum x86_64_reg_class regclass[MAX_CLASSES];
5867 int needed_sseregs, needed_intregs;
5868 rtx exp[MAX_CLASSES];
5871 n = classify_argument (mode, type, regclass, 0);
5874 if (!examine_argument (mode, type, in_return, &needed_intregs,
5877 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5880 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5881 some less clueful developer tries to use floating-point anyway. */
5882 if (needed_sseregs && !TARGET_SSE)
5886 if (!issued_sse_ret_error)
5888 error ("SSE register return with SSE disabled");
5889 issued_sse_ret_error = true;
5892 else if (!issued_sse_arg_error)
5894 error ("SSE register argument with SSE disabled");
5895 issued_sse_arg_error = true;
5900 /* Likewise, error if the ABI requires us to return values in the
5901 x87 registers and the user specified -mno-80387. */
5902 if (!TARGET_80387 && in_return)
5903 for (i = 0; i < n; i++)
5904 if (regclass[i] == X86_64_X87_CLASS
5905 || regclass[i] == X86_64_X87UP_CLASS
5906 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5908 if (!issued_x87_ret_error)
5910 error ("x87 register return with x87 disabled");
5911 issued_x87_ret_error = true;
5916 /* First construct simple cases. Avoid SCmode, since we want to use
5917 single register to pass this type. */
5918 if (n == 1 && mode != SCmode)
5919 switch (regclass[0])
5921 case X86_64_INTEGER_CLASS:
5922 case X86_64_INTEGERSI_CLASS:
5923 return gen_rtx_REG (mode, intreg[0]);
5924 case X86_64_SSE_CLASS:
5925 case X86_64_SSESF_CLASS:
5926 case X86_64_SSEDF_CLASS:
5927 if (mode != BLKmode)
5928 return gen_reg_or_parallel (mode, orig_mode,
5929 SSE_REGNO (sse_regno));
5931 case X86_64_X87_CLASS:
5932 case X86_64_COMPLEX_X87_CLASS:
5933 return gen_rtx_REG (mode, FIRST_STACK_REG);
5934 case X86_64_NO_CLASS:
5935 /* Zero sized array, struct or class. */
5940 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5941 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5942 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5944 && regclass[0] == X86_64_SSE_CLASS
5945 && regclass[1] == X86_64_SSEUP_CLASS
5946 && regclass[2] == X86_64_SSEUP_CLASS
5947 && regclass[3] == X86_64_SSEUP_CLASS
5949 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5952 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
5953 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
5954 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
5955 && regclass[1] == X86_64_INTEGER_CLASS
5956 && (mode == CDImode || mode == TImode || mode == TFmode)
5957 && intreg[0] + 1 == intreg[1])
5958 return gen_rtx_REG (mode, intreg[0]);
5960 /* Otherwise figure out the entries of the PARALLEL. */
5961 for (i = 0; i < n; i++)
5965 switch (regclass[i])
5967 case X86_64_NO_CLASS:
5969 case X86_64_INTEGER_CLASS:
5970 case X86_64_INTEGERSI_CLASS:
5971 /* Merge TImodes on aligned occasions here too. */
5972 if (i * 8 + 8 > bytes)
5973 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
5974 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
5978 /* We've requested 24 bytes we don't have mode for. Use DImode. */
5979 if (tmpmode == BLKmode)
5981 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5982 gen_rtx_REG (tmpmode, *intreg),
5986 case X86_64_SSESF_CLASS:
5987 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5988 gen_rtx_REG (SFmode,
5989 SSE_REGNO (sse_regno)),
5993 case X86_64_SSEDF_CLASS:
5994 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5995 gen_rtx_REG (DFmode,
5996 SSE_REGNO (sse_regno)),
6000 case X86_64_SSE_CLASS:
6008 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6018 && regclass[1] == X86_64_SSEUP_CLASS
6019 && regclass[2] == X86_64_SSEUP_CLASS
6020 && regclass[3] == X86_64_SSEUP_CLASS);
6027 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6028 gen_rtx_REG (tmpmode,
6029 SSE_REGNO (sse_regno)),
6038 /* Empty aligned struct, union or class. */
6042 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6043 for (i = 0; i < nexps; i++)
6044 XVECEXP (ret, 0, i) = exp [i];
6048 /* Update the data in CUM to advance over an argument of mode MODE
6049 and data type TYPE. (TYPE is null for libcalls where that information
6050 may not be available.) */
6053 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6054 const_tree type, HOST_WIDE_INT bytes,
6055 HOST_WIDE_INT words)
6071 cum->words += words;
6072 cum->nregs -= words;
6073 cum->regno += words;
6075 if (cum->nregs <= 0)
6083 /* OImode shouldn't be used directly. */
6087 if (cum->float_in_sse < 2)
6090 if (cum->float_in_sse < 1)
6107 if (!type || !AGGREGATE_TYPE_P (type))
6109 cum->sse_words += words;
6110 cum->sse_nregs -= 1;
6111 cum->sse_regno += 1;
6112 if (cum->sse_nregs <= 0)
6126 if (!type || !AGGREGATE_TYPE_P (type))
6128 cum->mmx_words += words;
6129 cum->mmx_nregs -= 1;
6130 cum->mmx_regno += 1;
6131 if (cum->mmx_nregs <= 0)
6142 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6143 const_tree type, HOST_WIDE_INT words, bool named)
6145 int int_nregs, sse_nregs;
6147 /* Unnamed 256bit vector mode parameters are passed on stack. */
6148 if (!named && VALID_AVX256_REG_MODE (mode))
6151 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6152 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6154 cum->nregs -= int_nregs;
6155 cum->sse_nregs -= sse_nregs;
6156 cum->regno += int_nregs;
6157 cum->sse_regno += sse_nregs;
6161 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6162 cum->words = (cum->words + align - 1) & ~(align - 1);
6163 cum->words += words;
6168 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6169 HOST_WIDE_INT words)
6171 /* Otherwise, this should be passed indirect. */
6172 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6174 cum->words += words;
6182 /* Update the data in CUM to advance over an argument of mode MODE and
6183 data type TYPE. (TYPE is null for libcalls where that information
6184 may not be available.) */
6187 ix86_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6188 const_tree type, bool named)
6190 HOST_WIDE_INT bytes, words;
6192 if (mode == BLKmode)
6193 bytes = int_size_in_bytes (type);
6195 bytes = GET_MODE_SIZE (mode);
6196 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6199 mode = type_natural_mode (type, NULL);
6201 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6202 function_arg_advance_ms_64 (cum, bytes, words);
6203 else if (TARGET_64BIT)
6204 function_arg_advance_64 (cum, mode, type, words, named);
6206 function_arg_advance_32 (cum, mode, type, bytes, words);
6209 /* Define where to put the arguments to a function.
6210 Value is zero to push the argument on the stack,
6211 or a hard register in which to store the argument.
6213 MODE is the argument's machine mode.
6214 TYPE is the data type of the argument (as a tree).
6215 This is null for libcalls where that information may
6217 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6218 the preceding args and about the function being called.
6219 NAMED is nonzero if this argument is a named parameter
6220 (otherwise it is an extra parameter matching an ellipsis). */
6223 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6224 enum machine_mode orig_mode, const_tree type,
6225 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6227 static bool warnedsse, warnedmmx;
6229 /* Avoid the AL settings for the Unix64 ABI. */
6230 if (mode == VOIDmode)
6246 if (words <= cum->nregs)
6248 int regno = cum->regno;
6250 /* Fastcall allocates the first two DWORD (SImode) or
6251 smaller arguments to ECX and EDX if it isn't an
6257 || (type && AGGREGATE_TYPE_P (type)))
6260 /* ECX not EAX is the first allocated register. */
6261 if (regno == AX_REG)
6264 return gen_rtx_REG (mode, regno);
6269 if (cum->float_in_sse < 2)
6272 if (cum->float_in_sse < 1)
6276 /* In 32bit, we pass TImode in xmm registers. */
6283 if (!type || !AGGREGATE_TYPE_P (type))
6285 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
6288 warning (0, "SSE vector argument without SSE enabled "
6292 return gen_reg_or_parallel (mode, orig_mode,
6293 cum->sse_regno + FIRST_SSE_REG);
6298 /* OImode shouldn't be used directly. */
6307 if (!type || !AGGREGATE_TYPE_P (type))
6310 return gen_reg_or_parallel (mode, orig_mode,
6311 cum->sse_regno + FIRST_SSE_REG);
6321 if (!type || !AGGREGATE_TYPE_P (type))
6323 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
6326 warning (0, "MMX vector argument without MMX enabled "
6330 return gen_reg_or_parallel (mode, orig_mode,
6331 cum->mmx_regno + FIRST_MMX_REG);
6340 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6341 enum machine_mode orig_mode, const_tree type, bool named)
6343 /* Handle a hidden AL argument containing number of registers
6344 for varargs x86-64 functions. */
6345 if (mode == VOIDmode)
6346 return GEN_INT (cum->maybe_vaarg
6347 ? (cum->sse_nregs < 0
6348 ? X86_64_SSE_REGPARM_MAX
6363 /* Unnamed 256bit vector mode parameters are passed on stack. */
6369 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6371 &x86_64_int_parameter_registers [cum->regno],
6376 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6377 enum machine_mode orig_mode, bool named,
6378 HOST_WIDE_INT bytes)
6382 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6383 We use value of -2 to specify that current function call is MSABI. */
6384 if (mode == VOIDmode)
6385 return GEN_INT (-2);
6387 /* If we've run out of registers, it goes on the stack. */
6388 if (cum->nregs == 0)
6391 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6393 /* Only floating point modes are passed in anything but integer regs. */
6394 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6397 regno = cum->regno + FIRST_SSE_REG;
6402 /* Unnamed floating parameters are passed in both the
6403 SSE and integer registers. */
6404 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
6405 t2 = gen_rtx_REG (mode, regno);
6406 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
6407 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
6408 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
6411 /* Handle aggregated types passed in register. */
6412 if (orig_mode == BLKmode)
6414 if (bytes > 0 && bytes <= 8)
6415 mode = (bytes > 4 ? DImode : SImode);
6416 if (mode == BLKmode)
6420 return gen_reg_or_parallel (mode, orig_mode, regno);
6423 /* Return where to put the arguments to a function.
6424 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6426 MODE is the argument's machine mode. TYPE is the data type of the
6427 argument. It is null for libcalls where that information may not be
6428 available. CUM gives information about the preceding args and about
6429 the function being called. NAMED is nonzero if this argument is a
6430 named parameter (otherwise it is an extra parameter matching an
6434 ix86_function_arg (const CUMULATIVE_ARGS *cum, enum machine_mode omode,
6435 const_tree type, bool named)
6437 enum machine_mode mode = omode;
6438 HOST_WIDE_INT bytes, words;
6440 if (mode == BLKmode)
6441 bytes = int_size_in_bytes (type);
6443 bytes = GET_MODE_SIZE (mode);
6444 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6446 /* To simplify the code below, represent vector types with a vector mode
6447 even if MMX/SSE are not active. */
6448 if (type && TREE_CODE (type) == VECTOR_TYPE)
6449 mode = type_natural_mode (type, cum);
6451 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6452 return function_arg_ms_64 (cum, mode, omode, named, bytes);
6453 else if (TARGET_64BIT)
6454 return function_arg_64 (cum, mode, omode, type, named);
6456 return function_arg_32 (cum, mode, omode, type, bytes, words);
6459 /* A C expression that indicates when an argument must be passed by
6460 reference. If nonzero for an argument, a copy of that argument is
6461 made in memory and a pointer to the argument is passed instead of
6462 the argument itself. The pointer is passed in whatever way is
6463 appropriate for passing a pointer to that type. */
6466 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
6467 enum machine_mode mode ATTRIBUTE_UNUSED,
6468 const_tree type, bool named ATTRIBUTE_UNUSED)
6470 /* See Windows x64 Software Convention. */
6471 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6473 int msize = (int) GET_MODE_SIZE (mode);
6476 /* Arrays are passed by reference. */
6477 if (TREE_CODE (type) == ARRAY_TYPE)
6480 if (AGGREGATE_TYPE_P (type))
6482 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6483 are passed by reference. */
6484 msize = int_size_in_bytes (type);
6488 /* __m128 is passed by reference. */
6490 case 1: case 2: case 4: case 8:
6496 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6502 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
6505 contains_aligned_value_p (const_tree type)
6507 enum machine_mode mode = TYPE_MODE (type);
6508 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6512 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6514 if (TYPE_ALIGN (type) < 128)
6517 if (AGGREGATE_TYPE_P (type))
6519 /* Walk the aggregates recursively. */
6520 switch (TREE_CODE (type))
6524 case QUAL_UNION_TYPE:
6528 /* Walk all the structure fields. */
6529 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6531 if (TREE_CODE (field) == FIELD_DECL
6532 && contains_aligned_value_p (TREE_TYPE (field)))
6539 /* Just for use if some languages passes arrays by value. */
6540 if (contains_aligned_value_p (TREE_TYPE (type)))
6551 /* Gives the alignment boundary, in bits, of an argument with the
6552 specified mode and type. */
6555 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
6560 /* Since the main variant type is used for call, we convert it to
6561 the main variant type. */
6562 type = TYPE_MAIN_VARIANT (type);
6563 align = TYPE_ALIGN (type);
6566 align = GET_MODE_ALIGNMENT (mode);
6567 if (align < PARM_BOUNDARY)
6568 align = PARM_BOUNDARY;
6569 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6570 natural boundaries. */
6571 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6573 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6574 make an exception for SSE modes since these require 128bit
6577 The handling here differs from field_alignment. ICC aligns MMX
6578 arguments to 4 byte boundaries, while structure fields are aligned
6579 to 8 byte boundaries. */
6582 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6583 align = PARM_BOUNDARY;
6587 if (!contains_aligned_value_p (type))
6588 align = PARM_BOUNDARY;
6591 if (align > BIGGEST_ALIGNMENT)
6592 align = BIGGEST_ALIGNMENT;
6596 /* Return true if N is a possible register number of function value. */
6599 ix86_function_value_regno_p (const unsigned int regno)
6606 case FIRST_FLOAT_REG:
6607 /* TODO: The function should depend on current function ABI but
6608 builtins.c would need updating then. Therefore we use the
6610 if (TARGET_64BIT && ix86_abi == MS_ABI)
6612 return TARGET_FLOAT_RETURNS_IN_80387;
6618 if (TARGET_MACHO || TARGET_64BIT)
6626 /* Define how to find the value returned by a function.
6627 VALTYPE is the data type of the value (as a tree).
6628 If the precise function being called is known, FUNC is its FUNCTION_DECL;
6629 otherwise, FUNC is 0. */
6632 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
6633 const_tree fntype, const_tree fn)
6637 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
6638 we normally prevent this case when mmx is not available. However
6639 some ABIs may require the result to be returned like DImode. */
6640 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6641 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
6643 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
6644 we prevent this case when sse is not available. However some ABIs
6645 may require the result to be returned like integer TImode. */
6646 else if (mode == TImode
6647 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6648 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
6650 /* 32-byte vector modes in %ymm0. */
6651 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
6652 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
6654 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
6655 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
6656 regno = FIRST_FLOAT_REG;
6658 /* Most things go in %eax. */
6661 /* Override FP return register with %xmm0 for local functions when
6662 SSE math is enabled or for functions with sseregparm attribute. */
6663 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
6665 int sse_level = ix86_function_sseregparm (fntype, fn, false);
6666 if ((sse_level >= 1 && mode == SFmode)
6667 || (sse_level == 2 && mode == DFmode))
6668 regno = FIRST_SSE_REG;
6671 /* OImode shouldn't be used directly. */
6672 gcc_assert (mode != OImode);
6674 return gen_rtx_REG (orig_mode, regno);
6678 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
6683 /* Handle libcalls, which don't provide a type node. */
6684 if (valtype == NULL)
6696 return gen_rtx_REG (mode, FIRST_SSE_REG);
6699 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
6703 return gen_rtx_REG (mode, AX_REG);
6707 ret = construct_container (mode, orig_mode, valtype, 1,
6708 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6709 x86_64_int_return_registers, 0);
6711 /* For zero sized structures, construct_container returns NULL, but we
6712 need to keep rest of compiler happy by returning meaningful value. */
6714 ret = gen_rtx_REG (orig_mode, AX_REG);
6720 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6722 unsigned int regno = AX_REG;
6726 switch (GET_MODE_SIZE (mode))
6729 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6730 && !COMPLEX_MODE_P (mode))
6731 regno = FIRST_SSE_REG;
6735 if (mode == SFmode || mode == DFmode)
6736 regno = FIRST_SSE_REG;
6742 return gen_rtx_REG (orig_mode, regno);
6746 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6747 enum machine_mode orig_mode, enum machine_mode mode)
6749 const_tree fn, fntype;
6752 if (fntype_or_decl && DECL_P (fntype_or_decl))
6753 fn = fntype_or_decl;
6754 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6756 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6757 return function_value_ms_64 (orig_mode, mode);
6758 else if (TARGET_64BIT)
6759 return function_value_64 (orig_mode, mode, valtype);
6761 return function_value_32 (orig_mode, mode, fntype, fn);
6765 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6766 bool outgoing ATTRIBUTE_UNUSED)
6768 enum machine_mode mode, orig_mode;
6770 orig_mode = TYPE_MODE (valtype);
6771 mode = type_natural_mode (valtype, NULL);
6772 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6776 ix86_libcall_value (enum machine_mode mode)
6778 return ix86_function_value_1 (NULL, NULL, mode, mode);
6781 /* Return true iff type is returned in memory. */
6783 static int ATTRIBUTE_UNUSED
6784 return_in_memory_32 (const_tree type, enum machine_mode mode)
6788 if (mode == BLKmode)
6791 size = int_size_in_bytes (type);
6793 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6796 if (VECTOR_MODE_P (mode) || mode == TImode)
6798 /* User-created vectors small enough to fit in EAX. */
6802 /* MMX/3dNow values are returned in MM0,
6803 except when it doesn't exits. */
6805 return (TARGET_MMX ? 0 : 1);
6807 /* SSE values are returned in XMM0, except when it doesn't exist. */
6809 return (TARGET_SSE ? 0 : 1);
6811 /* AVX values are returned in YMM0, except when it doesn't exist. */
6813 return TARGET_AVX ? 0 : 1;
6822 /* OImode shouldn't be used directly. */
6823 gcc_assert (mode != OImode);
6828 static int ATTRIBUTE_UNUSED
6829 return_in_memory_64 (const_tree type, enum machine_mode mode)
6831 int needed_intregs, needed_sseregs;
6832 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6835 static int ATTRIBUTE_UNUSED
6836 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6838 HOST_WIDE_INT size = int_size_in_bytes (type);
6840 /* __m128 is returned in xmm0. */
6841 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6842 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6845 /* Otherwise, the size must be exactly in [1248]. */
6846 return (size != 1 && size != 2 && size != 4 && size != 8);
6850 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6852 #ifdef SUBTARGET_RETURN_IN_MEMORY
6853 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6855 const enum machine_mode mode = type_natural_mode (type, NULL);
6859 if (ix86_function_type_abi (fntype) == MS_ABI)
6860 return return_in_memory_ms_64 (type, mode);
6862 return return_in_memory_64 (type, mode);
6865 return return_in_memory_32 (type, mode);
6869 /* Return false iff TYPE is returned in memory. This version is used
6870 on Solaris 2. It is similar to the generic ix86_return_in_memory,
6871 but differs notably in that when MMX is available, 8-byte vectors
6872 are returned in memory, rather than in MMX registers. */
6875 ix86_solaris_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6878 enum machine_mode mode = type_natural_mode (type, NULL);
6881 return return_in_memory_64 (type, mode);
6883 if (mode == BLKmode)
6886 size = int_size_in_bytes (type);
6888 if (VECTOR_MODE_P (mode))
6890 /* Return in memory only if MMX registers *are* available. This
6891 seems backwards, but it is consistent with the existing
6898 else if (mode == TImode)
6900 else if (mode == XFmode)
6906 /* When returning SSE vector types, we have a choice of either
6907 (1) being abi incompatible with a -march switch, or
6908 (2) generating an error.
6909 Given no good solution, I think the safest thing is one warning.
6910 The user won't be able to use -Werror, but....
6912 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6913 called in response to actually generating a caller or callee that
6914 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6915 via aggregate_value_p for general type probing from tree-ssa. */
6918 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6920 static bool warnedsse, warnedmmx;
6922 if (!TARGET_64BIT && type)
6924 /* Look at the return type of the function, not the function type. */
6925 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6927 if (!TARGET_SSE && !warnedsse)
6930 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6933 warning (0, "SSE vector return without SSE enabled "
6938 if (!TARGET_MMX && !warnedmmx)
6940 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6943 warning (0, "MMX vector return without MMX enabled "
6953 /* Create the va_list data type. */
6955 /* Returns the calling convention specific va_list date type.
6956 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
6959 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6961 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6963 /* For i386 we use plain pointer to argument area. */
6964 if (!TARGET_64BIT || abi == MS_ABI)
6965 return build_pointer_type (char_type_node);
6967 record = lang_hooks.types.make_type (RECORD_TYPE);
6968 type_decl = build_decl (BUILTINS_LOCATION,
6969 TYPE_DECL, get_identifier ("__va_list_tag"), record);
6971 f_gpr = build_decl (BUILTINS_LOCATION,
6972 FIELD_DECL, get_identifier ("gp_offset"),
6973 unsigned_type_node);
6974 f_fpr = build_decl (BUILTINS_LOCATION,
6975 FIELD_DECL, get_identifier ("fp_offset"),
6976 unsigned_type_node);
6977 f_ovf = build_decl (BUILTINS_LOCATION,
6978 FIELD_DECL, get_identifier ("overflow_arg_area"),
6980 f_sav = build_decl (BUILTINS_LOCATION,
6981 FIELD_DECL, get_identifier ("reg_save_area"),
6984 va_list_gpr_counter_field = f_gpr;
6985 va_list_fpr_counter_field = f_fpr;
6987 DECL_FIELD_CONTEXT (f_gpr) = record;
6988 DECL_FIELD_CONTEXT (f_fpr) = record;
6989 DECL_FIELD_CONTEXT (f_ovf) = record;
6990 DECL_FIELD_CONTEXT (f_sav) = record;
6992 TREE_CHAIN (record) = type_decl;
6993 TYPE_NAME (record) = type_decl;
6994 TYPE_FIELDS (record) = f_gpr;
6995 DECL_CHAIN (f_gpr) = f_fpr;
6996 DECL_CHAIN (f_fpr) = f_ovf;
6997 DECL_CHAIN (f_ovf) = f_sav;
6999 layout_type (record);
7001 /* The correct type is an array type of one element. */
7002 return build_array_type (record, build_index_type (size_zero_node));
7005 /* Setup the builtin va_list data type and for 64-bit the additional
7006 calling convention specific va_list data types. */
7009 ix86_build_builtin_va_list (void)
7011 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7013 /* Initialize abi specific va_list builtin types. */
7017 if (ix86_abi == MS_ABI)
7019 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7020 if (TREE_CODE (t) != RECORD_TYPE)
7021 t = build_variant_type_copy (t);
7022 sysv_va_list_type_node = t;
7027 if (TREE_CODE (t) != RECORD_TYPE)
7028 t = build_variant_type_copy (t);
7029 sysv_va_list_type_node = t;
7031 if (ix86_abi != MS_ABI)
7033 t = ix86_build_builtin_va_list_abi (MS_ABI);
7034 if (TREE_CODE (t) != RECORD_TYPE)
7035 t = build_variant_type_copy (t);
7036 ms_va_list_type_node = t;
7041 if (TREE_CODE (t) != RECORD_TYPE)
7042 t = build_variant_type_copy (t);
7043 ms_va_list_type_node = t;
7050 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7053 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7059 /* GPR size of varargs save area. */
7060 if (cfun->va_list_gpr_size)
7061 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7063 ix86_varargs_gpr_size = 0;
7065 /* FPR size of varargs save area. We don't need it if we don't pass
7066 anything in SSE registers. */
7067 if (TARGET_SSE && cfun->va_list_fpr_size)
7068 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7070 ix86_varargs_fpr_size = 0;
7072 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7075 save_area = frame_pointer_rtx;
7076 set = get_varargs_alias_set ();
7078 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7079 if (max > X86_64_REGPARM_MAX)
7080 max = X86_64_REGPARM_MAX;
7082 for (i = cum->regno; i < max; i++)
7084 mem = gen_rtx_MEM (Pmode,
7085 plus_constant (save_area, i * UNITS_PER_WORD));
7086 MEM_NOTRAP_P (mem) = 1;
7087 set_mem_alias_set (mem, set);
7088 emit_move_insn (mem, gen_rtx_REG (Pmode,
7089 x86_64_int_parameter_registers[i]));
7092 if (ix86_varargs_fpr_size)
7094 enum machine_mode smode;
7097 /* Now emit code to save SSE registers. The AX parameter contains number
7098 of SSE parameter registers used to call this function, though all we
7099 actually check here is the zero/non-zero status. */
7101 label = gen_label_rtx ();
7102 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7103 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7106 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7107 we used movdqa (i.e. TImode) instead? Perhaps even better would
7108 be if we could determine the real mode of the data, via a hook
7109 into pass_stdarg. Ignore all that for now. */
7111 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7112 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7114 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7115 if (max > X86_64_SSE_REGPARM_MAX)
7116 max = X86_64_SSE_REGPARM_MAX;
7118 for (i = cum->sse_regno; i < max; ++i)
7120 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7121 mem = gen_rtx_MEM (smode, mem);
7122 MEM_NOTRAP_P (mem) = 1;
7123 set_mem_alias_set (mem, set);
7124 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7126 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7134 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7136 alias_set_type set = get_varargs_alias_set ();
7139 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7143 mem = gen_rtx_MEM (Pmode,
7144 plus_constant (virtual_incoming_args_rtx,
7145 i * UNITS_PER_WORD));
7146 MEM_NOTRAP_P (mem) = 1;
7147 set_mem_alias_set (mem, set);
7149 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7150 emit_move_insn (mem, reg);
7155 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7156 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7159 CUMULATIVE_ARGS next_cum;
7162 /* This argument doesn't appear to be used anymore. Which is good,
7163 because the old code here didn't suppress rtl generation. */
7164 gcc_assert (!no_rtl);
7169 fntype = TREE_TYPE (current_function_decl);
7171 /* For varargs, we do not want to skip the dummy va_dcl argument.
7172 For stdargs, we do want to skip the last named argument. */
7174 if (stdarg_p (fntype))
7175 ix86_function_arg_advance (&next_cum, mode, type, true);
7177 if (cum->call_abi == MS_ABI)
7178 setup_incoming_varargs_ms_64 (&next_cum);
7180 setup_incoming_varargs_64 (&next_cum);
7183 /* Checks if TYPE is of kind va_list char *. */
7186 is_va_list_char_pointer (tree type)
7190 /* For 32-bit it is always true. */
7193 canonic = ix86_canonical_va_list_type (type);
7194 return (canonic == ms_va_list_type_node
7195 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7198 /* Implement va_start. */
7201 ix86_va_start (tree valist, rtx nextarg)
7203 HOST_WIDE_INT words, n_gpr, n_fpr;
7204 tree f_gpr, f_fpr, f_ovf, f_sav;
7205 tree gpr, fpr, ovf, sav, t;
7208 /* Only 64bit target needs something special. */
7209 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7211 std_expand_builtin_va_start (valist, nextarg);
7215 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7216 f_fpr = DECL_CHAIN (f_gpr);
7217 f_ovf = DECL_CHAIN (f_fpr);
7218 f_sav = DECL_CHAIN (f_ovf);
7220 valist = build_simple_mem_ref (valist);
7221 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
7222 /* The following should be folded into the MEM_REF offset. */
7223 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
7225 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
7227 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
7229 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
7232 /* Count number of gp and fp argument registers used. */
7233 words = crtl->args.info.words;
7234 n_gpr = crtl->args.info.regno;
7235 n_fpr = crtl->args.info.sse_regno;
7237 if (cfun->va_list_gpr_size)
7239 type = TREE_TYPE (gpr);
7240 t = build2 (MODIFY_EXPR, type,
7241 gpr, build_int_cst (type, n_gpr * 8));
7242 TREE_SIDE_EFFECTS (t) = 1;
7243 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7246 if (TARGET_SSE && cfun->va_list_fpr_size)
7248 type = TREE_TYPE (fpr);
7249 t = build2 (MODIFY_EXPR, type, fpr,
7250 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
7251 TREE_SIDE_EFFECTS (t) = 1;
7252 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7255 /* Find the overflow area. */
7256 type = TREE_TYPE (ovf);
7257 t = make_tree (type, crtl->args.internal_arg_pointer);
7259 t = build2 (POINTER_PLUS_EXPR, type, t,
7260 size_int (words * UNITS_PER_WORD));
7261 t = build2 (MODIFY_EXPR, type, ovf, t);
7262 TREE_SIDE_EFFECTS (t) = 1;
7263 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7265 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
7267 /* Find the register save area.
7268 Prologue of the function save it right above stack frame. */
7269 type = TREE_TYPE (sav);
7270 t = make_tree (type, frame_pointer_rtx);
7271 if (!ix86_varargs_gpr_size)
7272 t = build2 (POINTER_PLUS_EXPR, type, t,
7273 size_int (-8 * X86_64_REGPARM_MAX));
7274 t = build2 (MODIFY_EXPR, type, sav, t);
7275 TREE_SIDE_EFFECTS (t) = 1;
7276 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7280 /* Implement va_arg. */
7283 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7286 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
7287 tree f_gpr, f_fpr, f_ovf, f_sav;
7288 tree gpr, fpr, ovf, sav, t;
7290 tree lab_false, lab_over = NULL_TREE;
7295 enum machine_mode nat_mode;
7296 unsigned int arg_boundary;
7298 /* Only 64bit target needs something special. */
7299 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7300 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
7302 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7303 f_fpr = DECL_CHAIN (f_gpr);
7304 f_ovf = DECL_CHAIN (f_fpr);
7305 f_sav = DECL_CHAIN (f_ovf);
7307 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
7308 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
7309 valist = build_va_arg_indirect_ref (valist);
7310 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
7311 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
7312 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
7314 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7316 type = build_pointer_type (type);
7317 size = int_size_in_bytes (type);
7318 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7320 nat_mode = type_natural_mode (type, NULL);
7329 /* Unnamed 256bit vector mode parameters are passed on stack. */
7330 if (ix86_cfun_abi () == SYSV_ABI)
7337 container = construct_container (nat_mode, TYPE_MODE (type),
7338 type, 0, X86_64_REGPARM_MAX,
7339 X86_64_SSE_REGPARM_MAX, intreg,
7344 /* Pull the value out of the saved registers. */
7346 addr = create_tmp_var (ptr_type_node, "addr");
7350 int needed_intregs, needed_sseregs;
7352 tree int_addr, sse_addr;
7354 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7355 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7357 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
7359 need_temp = (!REG_P (container)
7360 && ((needed_intregs && TYPE_ALIGN (type) > 64)
7361 || TYPE_ALIGN (type) > 128));
7363 /* In case we are passing structure, verify that it is consecutive block
7364 on the register save area. If not we need to do moves. */
7365 if (!need_temp && !REG_P (container))
7367 /* Verify that all registers are strictly consecutive */
7368 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
7372 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7374 rtx slot = XVECEXP (container, 0, i);
7375 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
7376 || INTVAL (XEXP (slot, 1)) != i * 16)
7384 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7386 rtx slot = XVECEXP (container, 0, i);
7387 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
7388 || INTVAL (XEXP (slot, 1)) != i * 8)
7400 int_addr = create_tmp_var (ptr_type_node, "int_addr");
7401 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
7404 /* First ensure that we fit completely in registers. */
7407 t = build_int_cst (TREE_TYPE (gpr),
7408 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
7409 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
7410 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7411 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7412 gimplify_and_add (t, pre_p);
7416 t = build_int_cst (TREE_TYPE (fpr),
7417 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7418 + X86_64_REGPARM_MAX * 8);
7419 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
7420 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7421 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7422 gimplify_and_add (t, pre_p);
7425 /* Compute index to start of area used for integer regs. */
7428 /* int_addr = gpr + sav; */
7429 t = fold_convert (sizetype, gpr);
7430 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7431 gimplify_assign (int_addr, t, pre_p);
7435 /* sse_addr = fpr + sav; */
7436 t = fold_convert (sizetype, fpr);
7437 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7438 gimplify_assign (sse_addr, t, pre_p);
7442 int i, prev_size = 0;
7443 tree temp = create_tmp_var (type, "va_arg_tmp");
7446 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
7447 gimplify_assign (addr, t, pre_p);
7449 for (i = 0; i < XVECLEN (container, 0); i++)
7451 rtx slot = XVECEXP (container, 0, i);
7452 rtx reg = XEXP (slot, 0);
7453 enum machine_mode mode = GET_MODE (reg);
7459 tree dest_addr, dest;
7460 int cur_size = GET_MODE_SIZE (mode);
7462 if (prev_size + cur_size > size)
7464 cur_size = size - prev_size;
7465 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
7466 if (mode == BLKmode)
7469 piece_type = lang_hooks.types.type_for_mode (mode, 1);
7470 if (mode == GET_MODE (reg))
7471 addr_type = build_pointer_type (piece_type);
7473 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7475 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7478 if (SSE_REGNO_P (REGNO (reg)))
7480 src_addr = sse_addr;
7481 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7485 src_addr = int_addr;
7486 src_offset = REGNO (reg) * 8;
7488 src_addr = fold_convert (addr_type, src_addr);
7489 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
7490 size_int (src_offset));
7492 dest_addr = fold_convert (daddr_type, addr);
7493 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
7494 size_int (INTVAL (XEXP (slot, 1))));
7495 if (cur_size == GET_MODE_SIZE (mode))
7497 src = build_va_arg_indirect_ref (src_addr);
7498 dest = build_va_arg_indirect_ref (dest_addr);
7500 gimplify_assign (dest, src, pre_p);
7505 = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
7506 3, dest_addr, src_addr,
7507 size_int (cur_size));
7508 gimplify_and_add (copy, pre_p);
7510 prev_size += cur_size;
7516 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7517 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7518 gimplify_assign (gpr, t, pre_p);
7523 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7524 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7525 gimplify_assign (fpr, t, pre_p);
7528 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7530 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7533 /* ... otherwise out of the overflow area. */
7535 /* When we align parameter on stack for caller, if the parameter
7536 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7537 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7538 here with caller. */
7539 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
7540 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7541 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7543 /* Care for on-stack alignment if needed. */
7544 if (arg_boundary <= 64 || size == 0)
7548 HOST_WIDE_INT align = arg_boundary / 8;
7549 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
7550 size_int (align - 1));
7551 t = fold_convert (sizetype, t);
7552 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7554 t = fold_convert (TREE_TYPE (ovf), t);
7557 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7558 gimplify_assign (addr, t, pre_p);
7560 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
7561 size_int (rsize * UNITS_PER_WORD));
7562 gimplify_assign (unshare_expr (ovf), t, pre_p);
7565 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7567 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
7568 addr = fold_convert (ptrtype, addr);
7571 addr = build_va_arg_indirect_ref (addr);
7572 return build_va_arg_indirect_ref (addr);
7575 /* Return nonzero if OPNUM's MEM should be matched
7576 in movabs* patterns. */
7579 ix86_check_movabs (rtx insn, int opnum)
7583 set = PATTERN (insn);
7584 if (GET_CODE (set) == PARALLEL)
7585 set = XVECEXP (set, 0, 0);
7586 gcc_assert (GET_CODE (set) == SET);
7587 mem = XEXP (set, opnum);
7588 while (GET_CODE (mem) == SUBREG)
7589 mem = SUBREG_REG (mem);
7590 gcc_assert (MEM_P (mem));
7591 return (volatile_ok || !MEM_VOLATILE_P (mem));
7594 /* Initialize the table of extra 80387 mathematical constants. */
7597 init_ext_80387_constants (void)
7599 static const char * cst[5] =
7601 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
7602 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
7603 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
7604 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
7605 "3.1415926535897932385128089594061862044", /* 4: fldpi */
7609 for (i = 0; i < 5; i++)
7611 real_from_string (&ext_80387_constants_table[i], cst[i]);
7612 /* Ensure each constant is rounded to XFmode precision. */
7613 real_convert (&ext_80387_constants_table[i],
7614 XFmode, &ext_80387_constants_table[i]);
7617 ext_80387_constants_init = 1;
7620 /* Return true if the constant is something that can be loaded with
7621 a special instruction. */
7624 standard_80387_constant_p (rtx x)
7626 enum machine_mode mode = GET_MODE (x);
7630 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
7633 if (x == CONST0_RTX (mode))
7635 if (x == CONST1_RTX (mode))
7638 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7640 /* For XFmode constants, try to find a special 80387 instruction when
7641 optimizing for size or on those CPUs that benefit from them. */
7643 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
7647 if (! ext_80387_constants_init)
7648 init_ext_80387_constants ();
7650 for (i = 0; i < 5; i++)
7651 if (real_identical (&r, &ext_80387_constants_table[i]))
7655 /* Load of the constant -0.0 or -1.0 will be split as
7656 fldz;fchs or fld1;fchs sequence. */
7657 if (real_isnegzero (&r))
7659 if (real_identical (&r, &dconstm1))
7665 /* Return the opcode of the special instruction to be used to load
7669 standard_80387_constant_opcode (rtx x)
7671 switch (standard_80387_constant_p (x))
7695 /* Return the CONST_DOUBLE representing the 80387 constant that is
7696 loaded by the specified special instruction. The argument IDX
7697 matches the return value from standard_80387_constant_p. */
7700 standard_80387_constant_rtx (int idx)
7704 if (! ext_80387_constants_init)
7705 init_ext_80387_constants ();
7721 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
7725 /* Return 1 if X is all 0s and 2 if x is all 1s
7726 in supported SSE vector mode. */
7729 standard_sse_constant_p (rtx x)
7731 enum machine_mode mode = GET_MODE (x);
7733 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
7735 if (vector_all_ones_operand (x, mode))
7751 /* Return the opcode of the special instruction to be used to load
7755 standard_sse_constant_opcode (rtx insn, rtx x)
7757 switch (standard_sse_constant_p (x))
7760 switch (get_attr_mode (insn))
7763 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7765 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7766 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7768 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7770 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7771 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7773 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7775 return "vxorps\t%x0, %x0, %x0";
7777 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7778 return "vxorps\t%x0, %x0, %x0";
7780 return "vxorpd\t%x0, %x0, %x0";
7782 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7783 return "vxorps\t%x0, %x0, %x0";
7785 return "vpxor\t%x0, %x0, %x0";
7790 return TARGET_AVX ? "vpcmpeqd\t%0, %0, %0" : "pcmpeqd\t%0, %0";
7797 /* Returns 1 if OP contains a symbol reference */
7800 symbolic_reference_mentioned_p (rtx op)
7805 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7808 fmt = GET_RTX_FORMAT (GET_CODE (op));
7809 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7815 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7816 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7820 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7827 /* Return 1 if it is appropriate to emit `ret' instructions in the
7828 body of a function. Do this only if the epilogue is simple, needing a
7829 couple of insns. Prior to reloading, we can't tell how many registers
7830 must be saved, so return 0 then. Return 0 if there is no frame
7831 marker to de-allocate. */
7834 ix86_can_use_return_insn_p (void)
7836 struct ix86_frame frame;
7838 if (! reload_completed || frame_pointer_needed)
7841 /* Don't allow more than 32 pop, since that's all we can do
7842 with one instruction. */
7843 if (crtl->args.pops_args
7844 && crtl->args.size >= 32768)
7847 ix86_compute_frame_layout (&frame);
7848 return frame.to_allocate == 0 && frame.padding0 == 0
7849 && (frame.nregs + frame.nsseregs) == 0;
7852 /* Value should be nonzero if functions must have frame pointers.
7853 Zero means the frame pointer need not be set up (and parms may
7854 be accessed via the stack pointer) in functions that seem suitable. */
7857 ix86_frame_pointer_required (void)
7859 /* If we accessed previous frames, then the generated code expects
7860 to be able to access the saved ebp value in our frame. */
7861 if (cfun->machine->accesses_prev_frame)
7864 /* Several x86 os'es need a frame pointer for other reasons,
7865 usually pertaining to setjmp. */
7866 if (SUBTARGET_FRAME_POINTER_REQUIRED)
7869 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
7870 the frame pointer by default. Turn it back on now if we've not
7871 got a leaf function. */
7872 if (TARGET_OMIT_LEAF_FRAME_POINTER
7873 && (!current_function_is_leaf
7874 || ix86_current_function_calls_tls_descriptor))
7883 /* Record that the current function accesses previous call frames. */
7886 ix86_setup_frame_addresses (void)
7888 cfun->machine->accesses_prev_frame = 1;
7891 #ifndef USE_HIDDEN_LINKONCE
7892 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7893 # define USE_HIDDEN_LINKONCE 1
7895 # define USE_HIDDEN_LINKONCE 0
7899 static int pic_labels_used;
7901 /* Fills in the label name that should be used for a pc thunk for
7902 the given register. */
7905 get_pc_thunk_name (char name[32], unsigned int regno)
7907 gcc_assert (!TARGET_64BIT);
7909 if (USE_HIDDEN_LINKONCE)
7910 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7912 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7916 /* This function generates code for -fpic that loads %ebx with
7917 the return address of the caller and then returns. */
7920 ix86_code_end (void)
7925 for (regno = 0; regno < 8; ++regno)
7930 if (! ((pic_labels_used >> regno) & 1))
7933 get_pc_thunk_name (name, regno);
7935 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
7936 get_identifier (name),
7937 build_function_type (void_type_node, void_list_node));
7938 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
7939 NULL_TREE, void_type_node);
7940 TREE_PUBLIC (decl) = 1;
7941 TREE_STATIC (decl) = 1;
7946 switch_to_section (darwin_sections[text_coal_section]);
7947 fputs ("\t.weak_definition\t", asm_out_file);
7948 assemble_name (asm_out_file, name);
7949 fputs ("\n\t.private_extern\t", asm_out_file);
7950 assemble_name (asm_out_file, name);
7951 putc ('\n', asm_out_file);
7952 ASM_OUTPUT_LABEL (asm_out_file, name);
7953 DECL_WEAK (decl) = 1;
7957 if (USE_HIDDEN_LINKONCE)
7959 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
7961 targetm.asm_out.unique_section (decl, 0);
7962 switch_to_section (get_named_section (decl, NULL, 0));
7964 targetm.asm_out.globalize_label (asm_out_file, name);
7965 fputs ("\t.hidden\t", asm_out_file);
7966 assemble_name (asm_out_file, name);
7967 putc ('\n', asm_out_file);
7968 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
7972 switch_to_section (text_section);
7973 ASM_OUTPUT_LABEL (asm_out_file, name);
7976 DECL_INITIAL (decl) = make_node (BLOCK);
7977 current_function_decl = decl;
7978 init_function_start (decl);
7979 first_function_block_is_cold = false;
7980 /* Make sure unwind info is emitted for the thunk if needed. */
7981 final_start_function (emit_barrier (), asm_out_file, 1);
7983 xops[0] = gen_rtx_REG (Pmode, regno);
7984 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
7985 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
7986 output_asm_insn ("ret", xops);
7987 final_end_function ();
7988 init_insn_lengths ();
7989 free_after_compilation (cfun);
7991 current_function_decl = NULL;
7995 /* Emit code for the SET_GOT patterns. */
7998 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8004 if (TARGET_VXWORKS_RTP && flag_pic)
8006 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8007 xops[2] = gen_rtx_MEM (Pmode,
8008 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8009 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8011 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8012 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8013 an unadorned address. */
8014 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8015 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8016 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8020 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8022 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
8024 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8027 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8030 output_asm_insn ("call\t%a2", xops);
8031 #ifdef DWARF2_UNWIND_INFO
8032 /* The call to next label acts as a push. */
8033 if (dwarf2out_do_frame ())
8037 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8038 gen_rtx_PLUS (Pmode,
8041 RTX_FRAME_RELATED_P (insn) = 1;
8042 dwarf2out_frame_debug (insn, true);
8049 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8050 is what will be referenced by the Mach-O PIC subsystem. */
8052 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8055 targetm.asm_out.internal_label (asm_out_file, "L",
8056 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8060 output_asm_insn ("pop%z0\t%0", xops);
8061 #ifdef DWARF2_UNWIND_INFO
8062 /* The pop is a pop and clobbers dest, but doesn't restore it
8063 for unwind info purposes. */
8064 if (dwarf2out_do_frame ())
8068 insn = emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
8069 dwarf2out_frame_debug (insn, true);
8070 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8071 gen_rtx_PLUS (Pmode,
8074 RTX_FRAME_RELATED_P (insn) = 1;
8075 dwarf2out_frame_debug (insn, true);
8084 get_pc_thunk_name (name, REGNO (dest));
8085 pic_labels_used |= 1 << REGNO (dest);
8087 #ifdef DWARF2_UNWIND_INFO
8088 /* Ensure all queued register saves are flushed before the
8090 if (dwarf2out_do_frame ())
8094 insn = emit_barrier ();
8096 dwarf2out_frame_debug (insn, false);
8099 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8100 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8101 output_asm_insn ("call\t%X2", xops);
8102 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8103 is what will be referenced by the Mach-O PIC subsystem. */
8106 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8108 targetm.asm_out.internal_label (asm_out_file, "L",
8109 CODE_LABEL_NUMBER (label));
8116 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
8117 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8119 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
8124 /* Generate an "push" pattern for input ARG. */
8129 if (ix86_cfa_state->reg == stack_pointer_rtx)
8130 ix86_cfa_state->offset += UNITS_PER_WORD;
8132 return gen_rtx_SET (VOIDmode,
8134 gen_rtx_PRE_DEC (Pmode,
8135 stack_pointer_rtx)),
8139 /* Return >= 0 if there is an unused call-clobbered register available
8140 for the entire function. */
8143 ix86_select_alt_pic_regnum (void)
8145 if (current_function_is_leaf && !crtl->profile
8146 && !ix86_current_function_calls_tls_descriptor)
8149 /* Can't use the same register for both PIC and DRAP. */
8151 drap = REGNO (crtl->drap_reg);
8154 for (i = 2; i >= 0; --i)
8155 if (i != drap && !df_regs_ever_live_p (i))
8159 return INVALID_REGNUM;
8162 /* Return 1 if we need to save REGNO. */
8164 ix86_save_reg (unsigned int regno, int maybe_eh_return)
8166 if (pic_offset_table_rtx
8167 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
8168 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8170 || crtl->calls_eh_return
8171 || crtl->uses_const_pool))
8173 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
8178 if (crtl->calls_eh_return && maybe_eh_return)
8183 unsigned test = EH_RETURN_DATA_REGNO (i);
8184 if (test == INVALID_REGNUM)
8191 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8194 return (df_regs_ever_live_p (regno)
8195 && !call_used_regs[regno]
8196 && !fixed_regs[regno]
8197 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
8200 /* Return number of saved general prupose registers. */
8203 ix86_nsaved_regs (void)
8208 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8209 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8214 /* Return number of saved SSE registrers. */
8217 ix86_nsaved_sseregs (void)
8222 if (ix86_cfun_abi () != MS_ABI)
8224 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8225 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8230 /* Given FROM and TO register numbers, say whether this elimination is
8231 allowed. If stack alignment is needed, we can only replace argument
8232 pointer with hard frame pointer, or replace frame pointer with stack
8233 pointer. Otherwise, frame pointer elimination is automatically
8234 handled and all other eliminations are valid. */
8237 ix86_can_eliminate (const int from, const int to)
8239 if (stack_realign_fp)
8240 return ((from == ARG_POINTER_REGNUM
8241 && to == HARD_FRAME_POINTER_REGNUM)
8242 || (from == FRAME_POINTER_REGNUM
8243 && to == STACK_POINTER_REGNUM));
8245 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
8248 /* Return the offset between two registers, one to be eliminated, and the other
8249 its replacement, at the start of a routine. */
8252 ix86_initial_elimination_offset (int from, int to)
8254 struct ix86_frame frame;
8255 ix86_compute_frame_layout (&frame);
8257 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8258 return frame.hard_frame_pointer_offset;
8259 else if (from == FRAME_POINTER_REGNUM
8260 && to == HARD_FRAME_POINTER_REGNUM)
8261 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
8264 gcc_assert (to == STACK_POINTER_REGNUM);
8266 if (from == ARG_POINTER_REGNUM)
8267 return frame.stack_pointer_offset;
8269 gcc_assert (from == FRAME_POINTER_REGNUM);
8270 return frame.stack_pointer_offset - frame.frame_pointer_offset;
8274 /* In a dynamically-aligned function, we can't know the offset from
8275 stack pointer to frame pointer, so we must ensure that setjmp
8276 eliminates fp against the hard fp (%ebp) rather than trying to
8277 index from %esp up to the top of the frame across a gap that is
8278 of unknown (at compile-time) size. */
8280 ix86_builtin_setjmp_frame_value (void)
8282 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
8285 /* Fill structure ix86_frame about frame of currently computed function. */
8288 ix86_compute_frame_layout (struct ix86_frame *frame)
8290 unsigned int stack_alignment_needed;
8291 HOST_WIDE_INT offset;
8292 unsigned int preferred_alignment;
8293 HOST_WIDE_INT size = get_frame_size ();
8295 frame->nregs = ix86_nsaved_regs ();
8296 frame->nsseregs = ix86_nsaved_sseregs ();
8298 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
8299 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
8301 /* MS ABI seem to require stack alignment to be always 16 except for function
8302 prologues and leaf. */
8303 if ((ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
8304 && (!current_function_is_leaf || cfun->calls_alloca != 0
8305 || ix86_current_function_calls_tls_descriptor))
8307 preferred_alignment = 16;
8308 stack_alignment_needed = 16;
8309 crtl->preferred_stack_boundary = 128;
8310 crtl->stack_alignment_needed = 128;
8313 gcc_assert (!size || stack_alignment_needed);
8314 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
8315 gcc_assert (preferred_alignment <= stack_alignment_needed);
8317 /* During reload iteration the amount of registers saved can change.
8318 Recompute the value as needed. Do not recompute when amount of registers
8319 didn't change as reload does multiple calls to the function and does not
8320 expect the decision to change within single iteration. */
8321 if (!optimize_function_for_size_p (cfun)
8322 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
8324 int count = frame->nregs;
8325 struct cgraph_node *node = cgraph_node (current_function_decl);
8327 cfun->machine->use_fast_prologue_epilogue_nregs = count;
8328 /* The fast prologue uses move instead of push to save registers. This
8329 is significantly longer, but also executes faster as modern hardware
8330 can execute the moves in parallel, but can't do that for push/pop.
8332 Be careful about choosing what prologue to emit: When function takes
8333 many instructions to execute we may use slow version as well as in
8334 case function is known to be outside hot spot (this is known with
8335 feedback only). Weight the size of function by number of registers
8336 to save as it is cheap to use one or two push instructions but very
8337 slow to use many of them. */
8339 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
8340 if (node->frequency < NODE_FREQUENCY_NORMAL
8341 || (flag_branch_probabilities
8342 && node->frequency < NODE_FREQUENCY_HOT))
8343 cfun->machine->use_fast_prologue_epilogue = false;
8345 cfun->machine->use_fast_prologue_epilogue
8346 = !expensive_function_p (count);
8348 if (TARGET_PROLOGUE_USING_MOVE
8349 && cfun->machine->use_fast_prologue_epilogue)
8350 frame->save_regs_using_mov = true;
8352 frame->save_regs_using_mov = false;
8354 /* If static stack checking is enabled and done with probes, the registers
8355 need to be saved before allocating the frame. */
8356 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
8357 frame->save_regs_using_mov = false;
8359 /* Skip return address. */
8360 offset = UNITS_PER_WORD;
8362 /* Skip pushed static chain. */
8363 if (ix86_static_chain_on_stack)
8364 offset += UNITS_PER_WORD;
8366 /* Skip saved base pointer. */
8367 if (frame_pointer_needed)
8368 offset += UNITS_PER_WORD;
8370 frame->hard_frame_pointer_offset = offset;
8372 /* Set offset to aligned because the realigned frame starts from
8374 if (stack_realign_fp)
8375 offset = (offset + stack_alignment_needed -1) & -stack_alignment_needed;
8377 /* Register save area */
8378 offset += frame->nregs * UNITS_PER_WORD;
8380 /* Align SSE reg save area. */
8381 if (frame->nsseregs)
8382 frame->padding0 = ((offset + 16 - 1) & -16) - offset;
8384 frame->padding0 = 0;
8386 /* SSE register save area. */
8387 offset += frame->padding0 + frame->nsseregs * 16;
8390 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
8391 offset += frame->va_arg_size;
8393 /* Align start of frame for local function. */
8394 frame->padding1 = ((offset + stack_alignment_needed - 1)
8395 & -stack_alignment_needed) - offset;
8397 offset += frame->padding1;
8399 /* Frame pointer points here. */
8400 frame->frame_pointer_offset = offset;
8404 /* Add outgoing arguments area. Can be skipped if we eliminated
8405 all the function calls as dead code.
8406 Skipping is however impossible when function calls alloca. Alloca
8407 expander assumes that last crtl->outgoing_args_size
8408 of stack frame are unused. */
8409 if (ACCUMULATE_OUTGOING_ARGS
8410 && (!current_function_is_leaf || cfun->calls_alloca
8411 || ix86_current_function_calls_tls_descriptor))
8413 offset += crtl->outgoing_args_size;
8414 frame->outgoing_arguments_size = crtl->outgoing_args_size;
8417 frame->outgoing_arguments_size = 0;
8419 /* Align stack boundary. Only needed if we're calling another function
8421 if (!current_function_is_leaf || cfun->calls_alloca
8422 || ix86_current_function_calls_tls_descriptor)
8423 frame->padding2 = ((offset + preferred_alignment - 1)
8424 & -preferred_alignment) - offset;
8426 frame->padding2 = 0;
8428 offset += frame->padding2;
8430 /* We've reached end of stack frame. */
8431 frame->stack_pointer_offset = offset;
8433 /* Size prologue needs to allocate. */
8434 frame->to_allocate =
8435 (size + frame->padding1 + frame->padding2
8436 + frame->outgoing_arguments_size + frame->va_arg_size);
8438 if ((!frame->to_allocate && frame->nregs <= 1)
8439 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
8440 frame->save_regs_using_mov = false;
8442 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
8443 && current_function_sp_is_unchanging
8444 && current_function_is_leaf
8445 && !ix86_current_function_calls_tls_descriptor)
8447 frame->red_zone_size = frame->to_allocate;
8448 if (frame->save_regs_using_mov)
8449 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8450 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
8451 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
8454 frame->red_zone_size = 0;
8455 frame->to_allocate -= frame->red_zone_size;
8456 frame->stack_pointer_offset -= frame->red_zone_size;
8459 /* Emit code to save registers in the prologue. */
8462 ix86_emit_save_regs (void)
8467 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
8468 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8470 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
8471 RTX_FRAME_RELATED_P (insn) = 1;
8475 /* Emit code to save registers using MOV insns. First register
8476 is restored from POINTER + OFFSET. */
8478 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
8483 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8484 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8486 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
8488 gen_rtx_REG (Pmode, regno));
8489 RTX_FRAME_RELATED_P (insn) = 1;
8490 offset += UNITS_PER_WORD;
8494 /* Emit code to save registers using MOV insns. First register
8495 is restored from POINTER + OFFSET. */
8497 ix86_emit_save_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
8503 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8504 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8506 mem = adjust_address (gen_rtx_MEM (TImode, pointer), TImode, offset);
8507 set_mem_align (mem, 128);
8508 insn = emit_move_insn (mem, gen_rtx_REG (TImode, regno));
8509 RTX_FRAME_RELATED_P (insn) = 1;
8514 static GTY(()) rtx queued_cfa_restores;
8516 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
8517 manipulation insn. Don't add it if the previously
8518 saved value will be left untouched within stack red-zone till return,
8519 as unwinders can find the same value in the register and
8523 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT red_offset)
8526 && !TARGET_64BIT_MS_ABI
8527 && red_offset + RED_ZONE_SIZE >= 0
8528 && crtl->args.pops_args < 65536)
8533 add_reg_note (insn, REG_CFA_RESTORE, reg);
8534 RTX_FRAME_RELATED_P (insn) = 1;
8538 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
8541 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
8544 ix86_add_queued_cfa_restore_notes (rtx insn)
8547 if (!queued_cfa_restores)
8549 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
8551 XEXP (last, 1) = REG_NOTES (insn);
8552 REG_NOTES (insn) = queued_cfa_restores;
8553 queued_cfa_restores = NULL_RTX;
8554 RTX_FRAME_RELATED_P (insn) = 1;
8557 /* Expand prologue or epilogue stack adjustment.
8558 The pattern exist to put a dependency on all ebp-based memory accesses.
8559 STYLE should be negative if instructions should be marked as frame related,
8560 zero if %r11 register is live and cannot be freely used and positive
8564 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
8565 int style, bool set_cfa)
8570 insn = emit_insn (gen_pro_epilogue_adjust_stack_si_1 (dest, src, offset));
8571 else if (x86_64_immediate_operand (offset, DImode))
8572 insn = emit_insn (gen_pro_epilogue_adjust_stack_di_1 (dest, src, offset));
8576 /* r11 is used by indirect sibcall return as well, set before the
8577 epilogue and used after the epilogue. */
8579 tmp = gen_rtx_REG (DImode, R11_REG);
8582 gcc_assert (src != hard_frame_pointer_rtx
8583 && dest != hard_frame_pointer_rtx);
8584 tmp = hard_frame_pointer_rtx;
8586 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
8588 RTX_FRAME_RELATED_P (insn) = 1;
8589 insn = emit_insn (gen_pro_epilogue_adjust_stack_di_2 (dest, src, tmp,
8594 ix86_add_queued_cfa_restore_notes (insn);
8600 gcc_assert (ix86_cfa_state->reg == src);
8601 ix86_cfa_state->offset += INTVAL (offset);
8602 ix86_cfa_state->reg = dest;
8604 r = gen_rtx_PLUS (Pmode, src, offset);
8605 r = gen_rtx_SET (VOIDmode, dest, r);
8606 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
8607 RTX_FRAME_RELATED_P (insn) = 1;
8610 RTX_FRAME_RELATED_P (insn) = 1;
8613 /* Find an available register to be used as dynamic realign argument
8614 pointer regsiter. Such a register will be written in prologue and
8615 used in begin of body, so it must not be
8616 1. parameter passing register.
8618 We reuse static-chain register if it is available. Otherwise, we
8619 use DI for i386 and R13 for x86-64. We chose R13 since it has
8622 Return: the regno of chosen register. */
8625 find_drap_reg (void)
8627 tree decl = cfun->decl;
8631 /* Use R13 for nested function or function need static chain.
8632 Since function with tail call may use any caller-saved
8633 registers in epilogue, DRAP must not use caller-saved
8634 register in such case. */
8635 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
8642 /* Use DI for nested function or function need static chain.
8643 Since function with tail call may use any caller-saved
8644 registers in epilogue, DRAP must not use caller-saved
8645 register in such case. */
8646 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
8649 /* Reuse static chain register if it isn't used for parameter
8651 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
8652 && !lookup_attribute ("fastcall",
8653 TYPE_ATTRIBUTES (TREE_TYPE (decl)))
8654 && !lookup_attribute ("thiscall",
8655 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
8662 /* Return minimum incoming stack alignment. */
8665 ix86_minimum_incoming_stack_boundary (bool sibcall)
8667 unsigned int incoming_stack_boundary;
8669 /* Prefer the one specified at command line. */
8670 if (ix86_user_incoming_stack_boundary)
8671 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
8672 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
8673 if -mstackrealign is used, it isn't used for sibcall check and
8674 estimated stack alignment is 128bit. */
8677 && ix86_force_align_arg_pointer
8678 && crtl->stack_alignment_estimated == 128)
8679 incoming_stack_boundary = MIN_STACK_BOUNDARY;
8681 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
8683 /* Incoming stack alignment can be changed on individual functions
8684 via force_align_arg_pointer attribute. We use the smallest
8685 incoming stack boundary. */
8686 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
8687 && lookup_attribute (ix86_force_align_arg_pointer_string,
8688 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
8689 incoming_stack_boundary = MIN_STACK_BOUNDARY;
8691 /* The incoming stack frame has to be aligned at least at
8692 parm_stack_boundary. */
8693 if (incoming_stack_boundary < crtl->parm_stack_boundary)
8694 incoming_stack_boundary = crtl->parm_stack_boundary;
8696 /* Stack at entrance of main is aligned by runtime. We use the
8697 smallest incoming stack boundary. */
8698 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
8699 && DECL_NAME (current_function_decl)
8700 && MAIN_NAME_P (DECL_NAME (current_function_decl))
8701 && DECL_FILE_SCOPE_P (current_function_decl))
8702 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
8704 return incoming_stack_boundary;
8707 /* Update incoming stack boundary and estimated stack alignment. */
8710 ix86_update_stack_boundary (void)
8712 ix86_incoming_stack_boundary
8713 = ix86_minimum_incoming_stack_boundary (false);
8715 /* x86_64 vararg needs 16byte stack alignment for register save
8719 && crtl->stack_alignment_estimated < 128)
8720 crtl->stack_alignment_estimated = 128;
8723 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
8724 needed or an rtx for DRAP otherwise. */
8727 ix86_get_drap_rtx (void)
8729 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
8730 crtl->need_drap = true;
8732 if (stack_realign_drap)
8734 /* Assign DRAP to vDRAP and returns vDRAP */
8735 unsigned int regno = find_drap_reg ();
8740 arg_ptr = gen_rtx_REG (Pmode, regno);
8741 crtl->drap_reg = arg_ptr;
8744 drap_vreg = copy_to_reg (arg_ptr);
8748 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
8751 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
8752 RTX_FRAME_RELATED_P (insn) = 1;
8760 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
8763 ix86_internal_arg_pointer (void)
8765 return virtual_incoming_args_rtx;
8768 struct scratch_reg {
8773 /* Return a short-lived scratch register for use on function entry.
8774 In 32-bit mode, it is valid only after the registers are saved
8775 in the prologue. This register must be released by means of
8776 release_scratch_register_on_entry once it is dead. */
8779 get_scratch_register_on_entry (struct scratch_reg *sr)
8787 /* We always use R11 in 64-bit mode. */
8792 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
8794 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
8795 bool static_chain_p = DECL_STATIC_CHAIN (decl);
8796 int regparm = ix86_function_regparm (fntype, decl);
8798 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
8800 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
8801 for the static chain register. */
8802 if ((regparm < 1 || (fastcall_p && !static_chain_p))
8803 && drap_regno != AX_REG)
8805 else if (regparm < 2 && drap_regno != DX_REG)
8807 /* ecx is the static chain register. */
8808 else if (regparm < 3 && !fastcall_p && !static_chain_p
8809 && drap_regno != CX_REG)
8811 else if (ix86_save_reg (BX_REG, true))
8813 /* esi is the static chain register. */
8814 else if (!(regparm == 3 && static_chain_p)
8815 && ix86_save_reg (SI_REG, true))
8817 else if (ix86_save_reg (DI_REG, true))
8821 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
8826 sr->reg = gen_rtx_REG (Pmode, regno);
8829 rtx insn = emit_insn (gen_push (sr->reg));
8830 RTX_FRAME_RELATED_P (insn) = 1;
8834 /* Release a scratch register obtained from the preceding function. */
8837 release_scratch_register_on_entry (struct scratch_reg *sr)
8841 rtx x, insn = emit_insn (ix86_gen_pop1 (sr->reg));
8843 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
8844 RTX_FRAME_RELATED_P (insn) = 1;
8845 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
8846 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
8847 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
8851 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
8853 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
8856 ix86_adjust_stack_and_probe (HOST_WIDE_INT size)
8858 /* We skip the probe for the first interval + a small dope of 4 words and
8859 probe that many bytes past the specified size to maintain a protection
8860 area at the botton of the stack. */
8861 const int dope = 4 * UNITS_PER_WORD;
8862 rtx size_rtx = GEN_INT (size);
8864 /* See if we have a constant small number of probes to generate. If so,
8865 that's the easy case. The run-time loop is made up of 11 insns in the
8866 generic case while the compile-time loop is made up of 3+2*(n-1) insns
8867 for n # of intervals. */
8868 if (size <= 5 * PROBE_INTERVAL)
8870 HOST_WIDE_INT i, adjust;
8871 bool first_probe = true;
8873 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
8874 values of N from 1 until it exceeds SIZE. If only one probe is
8875 needed, this will not generate any code. Then adjust and probe
8876 to PROBE_INTERVAL + SIZE. */
8877 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
8881 adjust = 2 * PROBE_INTERVAL + dope;
8882 first_probe = false;
8885 adjust = PROBE_INTERVAL;
8887 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8888 plus_constant (stack_pointer_rtx, -adjust)));
8889 emit_stack_probe (stack_pointer_rtx);
8893 adjust = size + PROBE_INTERVAL + dope;
8895 adjust = size + PROBE_INTERVAL - i;
8897 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8898 plus_constant (stack_pointer_rtx, -adjust)));
8899 emit_stack_probe (stack_pointer_rtx);
8901 /* Adjust back to account for the additional first interval. */
8902 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8903 plus_constant (stack_pointer_rtx,
8904 PROBE_INTERVAL + dope)));
8907 /* Otherwise, do the same as above, but in a loop. Note that we must be
8908 extra careful with variables wrapping around because we might be at
8909 the very top (or the very bottom) of the address space and we have
8910 to be able to handle this case properly; in particular, we use an
8911 equality test for the loop condition. */
8914 HOST_WIDE_INT rounded_size;
8915 struct scratch_reg sr;
8917 get_scratch_register_on_entry (&sr);
8920 /* Step 1: round SIZE to the previous multiple of the interval. */
8922 rounded_size = size & -PROBE_INTERVAL;
8925 /* Step 2: compute initial and final value of the loop counter. */
8927 /* SP = SP_0 + PROBE_INTERVAL. */
8928 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8929 plus_constant (stack_pointer_rtx,
8930 - (PROBE_INTERVAL + dope))));
8932 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
8933 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
8934 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
8935 gen_rtx_PLUS (Pmode, sr.reg,
8936 stack_pointer_rtx)));
8941 while (SP != LAST_ADDR)
8943 SP = SP + PROBE_INTERVAL
8947 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
8948 values of N from 1 until it is equal to ROUNDED_SIZE. */
8950 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
8953 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
8954 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
8956 if (size != rounded_size)
8958 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8959 plus_constant (stack_pointer_rtx,
8960 rounded_size - size)));
8961 emit_stack_probe (stack_pointer_rtx);
8964 /* Adjust back to account for the additional first interval. */
8965 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8966 plus_constant (stack_pointer_rtx,
8967 PROBE_INTERVAL + dope)));
8969 release_scratch_register_on_entry (&sr);
8972 gcc_assert (ix86_cfa_state->reg != stack_pointer_rtx);
8974 /* Make sure nothing is scheduled before we are done. */
8975 emit_insn (gen_blockage ());
8978 /* Adjust the stack pointer up to REG while probing it. */
8981 output_adjust_stack_and_probe (rtx reg)
8983 static int labelno = 0;
8984 char loop_lab[32], end_lab[32];
8987 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
8988 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
8990 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
8992 /* Jump to END_LAB if SP == LAST_ADDR. */
8993 xops[0] = stack_pointer_rtx;
8995 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
8996 fputs ("\tje\t", asm_out_file);
8997 assemble_name_raw (asm_out_file, end_lab);
8998 fputc ('\n', asm_out_file);
9000 /* SP = SP + PROBE_INTERVAL. */
9001 xops[1] = GEN_INT (PROBE_INTERVAL);
9002 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9005 xops[1] = const0_rtx;
9006 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
9008 fprintf (asm_out_file, "\tjmp\t");
9009 assemble_name_raw (asm_out_file, loop_lab);
9010 fputc ('\n', asm_out_file);
9012 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9017 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9018 inclusive. These are offsets from the current stack pointer. */
9021 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
9023 /* See if we have a constant small number of probes to generate. If so,
9024 that's the easy case. The run-time loop is made up of 7 insns in the
9025 generic case while the compile-time loop is made up of n insns for n #
9027 if (size <= 7 * PROBE_INTERVAL)
9031 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9032 it exceeds SIZE. If only one probe is needed, this will not
9033 generate any code. Then probe at FIRST + SIZE. */
9034 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9035 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
9037 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
9040 /* Otherwise, do the same as above, but in a loop. Note that we must be
9041 extra careful with variables wrapping around because we might be at
9042 the very top (or the very bottom) of the address space and we have
9043 to be able to handle this case properly; in particular, we use an
9044 equality test for the loop condition. */
9047 HOST_WIDE_INT rounded_size, last;
9048 struct scratch_reg sr;
9050 get_scratch_register_on_entry (&sr);
9053 /* Step 1: round SIZE to the previous multiple of the interval. */
9055 rounded_size = size & -PROBE_INTERVAL;
9058 /* Step 2: compute initial and final value of the loop counter. */
9060 /* TEST_OFFSET = FIRST. */
9061 emit_move_insn (sr.reg, GEN_INT (-first));
9063 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9064 last = first + rounded_size;
9069 while (TEST_ADDR != LAST_ADDR)
9071 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9075 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9076 until it is equal to ROUNDED_SIZE. */
9078 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
9081 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9082 that SIZE is equal to ROUNDED_SIZE. */
9084 if (size != rounded_size)
9085 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
9088 rounded_size - size));
9090 release_scratch_register_on_entry (&sr);
9093 /* Make sure nothing is scheduled before we are done. */
9094 emit_insn (gen_blockage ());
9097 /* Probe a range of stack addresses from REG to END, inclusive. These are
9098 offsets from the current stack pointer. */
9101 output_probe_stack_range (rtx reg, rtx end)
9103 static int labelno = 0;
9104 char loop_lab[32], end_lab[32];
9107 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9108 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9110 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9112 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
9115 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9116 fputs ("\tje\t", asm_out_file);
9117 assemble_name_raw (asm_out_file, end_lab);
9118 fputc ('\n', asm_out_file);
9120 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
9121 xops[1] = GEN_INT (PROBE_INTERVAL);
9122 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9124 /* Probe at TEST_ADDR. */
9125 xops[0] = stack_pointer_rtx;
9127 xops[2] = const0_rtx;
9128 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
9130 fprintf (asm_out_file, "\tjmp\t");
9131 assemble_name_raw (asm_out_file, loop_lab);
9132 fputc ('\n', asm_out_file);
9134 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9139 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
9140 to be generated in correct form. */
9142 ix86_finalize_stack_realign_flags (void)
9144 /* Check if stack realign is really needed after reload, and
9145 stores result in cfun */
9146 unsigned int incoming_stack_boundary
9147 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
9148 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
9149 unsigned int stack_realign = (incoming_stack_boundary
9150 < (current_function_is_leaf
9151 ? crtl->max_used_stack_slot_alignment
9152 : crtl->stack_alignment_needed));
9154 if (crtl->stack_realign_finalized)
9156 /* After stack_realign_needed is finalized, we can't no longer
9158 gcc_assert (crtl->stack_realign_needed == stack_realign);
9162 crtl->stack_realign_needed = stack_realign;
9163 crtl->stack_realign_finalized = true;
9167 /* Expand the prologue into a bunch of separate insns. */
9170 ix86_expand_prologue (void)
9174 struct ix86_frame frame;
9175 HOST_WIDE_INT allocate;
9176 int gen_frame_pointer = frame_pointer_needed;
9178 ix86_finalize_stack_realign_flags ();
9180 /* DRAP should not coexist with stack_realign_fp */
9181 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
9183 /* Initialize CFA state for before the prologue. */
9184 ix86_cfa_state->reg = stack_pointer_rtx;
9185 ix86_cfa_state->offset = INCOMING_FRAME_SP_OFFSET;
9187 ix86_compute_frame_layout (&frame);
9189 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
9193 /* Make sure the function starts with
9194 8b ff movl.s %edi,%edi (emited by ix86_asm_output_function_label)
9196 8b ec movl.s %esp,%ebp
9198 This matches the hookable function prologue in Win32 API
9199 functions in Microsoft Windows XP Service Pack 2 and newer.
9200 Wine uses this to enable Windows apps to hook the Win32 API
9201 functions provided by Wine. */
9202 push = emit_insn (gen_push (hard_frame_pointer_rtx));
9203 mov = emit_insn (gen_vswapmov (hard_frame_pointer_rtx,
9204 stack_pointer_rtx));
9206 if (frame_pointer_needed && !(crtl->drap_reg
9207 && crtl->stack_realign_needed))
9209 /* The push %ebp and movl.s %esp, %ebp already set up
9210 the frame pointer. No need to do this again. */
9211 gen_frame_pointer = 0;
9212 RTX_FRAME_RELATED_P (push) = 1;
9213 RTX_FRAME_RELATED_P (mov) = 1;
9214 if (ix86_cfa_state->reg == stack_pointer_rtx)
9215 ix86_cfa_state->reg = hard_frame_pointer_rtx;
9218 /* If the frame pointer is not needed, pop %ebp again. This
9219 could be optimized for cases where ebp needs to be backed up
9220 for some other reason. If stack realignment is needed, pop
9221 the base pointer again, align the stack, and later regenerate
9222 the frame pointer setup. The frame pointer generated by the
9223 hook prologue is not aligned, so it can't be used. */
9224 insn = emit_insn (ix86_gen_pop1 (hard_frame_pointer_rtx));
9227 /* The first insn of a function that accepts its static chain on the
9228 stack is to push the register that would be filled in by a direct
9229 call. This insn will be skipped by the trampoline. */
9230 if (ix86_static_chain_on_stack)
9234 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
9235 emit_insn (gen_blockage ());
9237 /* We don't want to interpret this push insn as a register save,
9238 only as a stack adjustment. The real copy of the register as
9239 a save will be done later, if needed. */
9240 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
9241 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
9242 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
9243 RTX_FRAME_RELATED_P (insn) = 1;
9246 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
9247 of DRAP is needed and stack realignment is really needed after reload */
9248 if (crtl->drap_reg && crtl->stack_realign_needed)
9251 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9252 int param_ptr_offset = UNITS_PER_WORD;
9254 if (ix86_static_chain_on_stack)
9255 param_ptr_offset += UNITS_PER_WORD;
9256 if (!call_used_regs[REGNO (crtl->drap_reg)])
9257 param_ptr_offset += UNITS_PER_WORD;
9259 gcc_assert (stack_realign_drap);
9261 /* Grab the argument pointer. */
9262 x = plus_constant (stack_pointer_rtx, param_ptr_offset);
9265 /* Only need to push parameter pointer reg if it is caller
9267 if (!call_used_regs[REGNO (crtl->drap_reg)])
9269 /* Push arg pointer reg */
9270 insn = emit_insn (gen_push (y));
9271 RTX_FRAME_RELATED_P (insn) = 1;
9274 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
9275 RTX_FRAME_RELATED_P (insn) = 1;
9276 ix86_cfa_state->reg = crtl->drap_reg;
9278 /* Align the stack. */
9279 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
9281 GEN_INT (-align_bytes)));
9282 RTX_FRAME_RELATED_P (insn) = 1;
9284 /* Replicate the return address on the stack so that return
9285 address can be reached via (argp - 1) slot. This is needed
9286 to implement macro RETURN_ADDR_RTX and intrinsic function
9287 expand_builtin_return_addr etc. */
9289 x = gen_frame_mem (Pmode,
9290 plus_constant (x, -UNITS_PER_WORD));
9291 insn = emit_insn (gen_push (x));
9292 RTX_FRAME_RELATED_P (insn) = 1;
9295 /* Note: AT&T enter does NOT have reversed args. Enter is probably
9296 slower on all targets. Also sdb doesn't like it. */
9298 if (gen_frame_pointer)
9300 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
9301 RTX_FRAME_RELATED_P (insn) = 1;
9303 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
9304 RTX_FRAME_RELATED_P (insn) = 1;
9306 if (ix86_cfa_state->reg == stack_pointer_rtx)
9307 ix86_cfa_state->reg = hard_frame_pointer_rtx;
9310 if (stack_realign_fp)
9312 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9313 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
9315 /* Align the stack. */
9316 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
9318 GEN_INT (-align_bytes)));
9319 RTX_FRAME_RELATED_P (insn) = 1;
9322 allocate = frame.to_allocate + frame.nsseregs * 16 + frame.padding0;
9324 if (!frame.save_regs_using_mov)
9325 ix86_emit_save_regs ();
9327 allocate += frame.nregs * UNITS_PER_WORD;
9329 /* The stack has already been decremented by the instruction calling us
9330 so we need to probe unconditionally to preserve the protection area. */
9331 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
9333 /* We expect the registers to be saved when probes are used. */
9334 gcc_assert (!frame.save_regs_using_mov);
9336 if (STACK_CHECK_MOVING_SP)
9338 ix86_adjust_stack_and_probe (allocate);
9343 HOST_WIDE_INT size = allocate;
9345 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
9346 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
9348 if (TARGET_STACK_PROBE)
9349 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
9351 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
9355 /* When using red zone we may start register saving before allocating
9356 the stack frame saving one cycle of the prologue. However I will
9357 avoid doing this if I am going to have to probe the stack since
9358 at least on x86_64 the stack probe can turn into a call that clobbers
9359 a red zone location */
9360 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
9361 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
9362 ix86_emit_save_regs_using_mov ((frame_pointer_needed
9363 && !crtl->stack_realign_needed)
9364 ? hard_frame_pointer_rtx
9365 : stack_pointer_rtx,
9366 -frame.nregs * UNITS_PER_WORD);
9370 else if (!ix86_target_stack_probe () || allocate < CHECK_STACK_LIMIT)
9371 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9372 GEN_INT (-allocate), -1,
9373 ix86_cfa_state->reg == stack_pointer_rtx);
9376 rtx eax = gen_rtx_REG (Pmode, AX_REG);
9380 if (cfun->machine->call_abi == MS_ABI)
9383 eax_live = ix86_eax_live_at_start_p ();
9387 emit_insn (gen_push (eax));
9388 allocate -= UNITS_PER_WORD;
9391 emit_move_insn (eax, GEN_INT (allocate));
9393 insn = emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
9395 if (ix86_cfa_state->reg == stack_pointer_rtx)
9397 ix86_cfa_state->offset += allocate;
9398 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
9399 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
9400 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
9401 RTX_FRAME_RELATED_P (insn) = 1;
9406 if (frame_pointer_needed)
9407 t = plus_constant (hard_frame_pointer_rtx,
9410 - frame.nregs * UNITS_PER_WORD);
9412 t = plus_constant (stack_pointer_rtx, allocate);
9413 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
9417 if (frame.save_regs_using_mov
9418 && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
9419 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
9421 if (!frame_pointer_needed
9422 || !(frame.to_allocate + frame.padding0)
9423 || crtl->stack_realign_needed)
9424 ix86_emit_save_regs_using_mov (stack_pointer_rtx,
9426 + frame.nsseregs * 16 + frame.padding0);
9428 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
9429 -frame.nregs * UNITS_PER_WORD);
9431 if (!frame_pointer_needed
9432 || !(frame.to_allocate + frame.padding0)
9433 || crtl->stack_realign_needed)
9434 ix86_emit_save_sse_regs_using_mov (stack_pointer_rtx,
9437 ix86_emit_save_sse_regs_using_mov (hard_frame_pointer_rtx,
9438 - frame.nregs * UNITS_PER_WORD
9439 - frame.nsseregs * 16
9442 pic_reg_used = false;
9443 if (pic_offset_table_rtx
9444 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9447 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
9449 if (alt_pic_reg_used != INVALID_REGNUM)
9450 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
9452 pic_reg_used = true;
9459 if (ix86_cmodel == CM_LARGE_PIC)
9461 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
9462 rtx label = gen_label_rtx ();
9464 LABEL_PRESERVE_P (label) = 1;
9465 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
9466 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
9467 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
9468 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
9469 pic_offset_table_rtx, tmp_reg));
9472 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
9475 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
9478 /* In the pic_reg_used case, make sure that the got load isn't deleted
9479 when mcount needs it. Blockage to avoid call movement across mcount
9480 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
9482 if (crtl->profile && pic_reg_used)
9483 emit_insn (gen_prologue_use (pic_offset_table_rtx));
9485 if (crtl->drap_reg && !crtl->stack_realign_needed)
9487 /* vDRAP is setup but after reload it turns out stack realign
9488 isn't necessary, here we will emit prologue to setup DRAP
9489 without stack realign adjustment */
9491 int drap_bp_offset = UNITS_PER_WORD * 2;
9493 if (ix86_static_chain_on_stack)
9494 drap_bp_offset += UNITS_PER_WORD;
9495 x = plus_constant (hard_frame_pointer_rtx, drap_bp_offset);
9496 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, x));
9499 /* Prevent instructions from being scheduled into register save push
9500 sequence when access to the redzone area is done through frame pointer.
9501 The offset between the frame pointer and the stack pointer is calculated
9502 relative to the value of the stack pointer at the end of the function
9503 prologue, and moving instructions that access redzone area via frame
9504 pointer inside push sequence violates this assumption. */
9505 if (frame_pointer_needed && frame.red_zone_size)
9506 emit_insn (gen_memory_blockage ());
9508 /* Emit cld instruction if stringops are used in the function. */
9509 if (TARGET_CLD && ix86_current_function_needs_cld)
9510 emit_insn (gen_cld ());
9513 /* Emit code to restore REG using a POP insn. */
9516 ix86_emit_restore_reg_using_pop (rtx reg, HOST_WIDE_INT red_offset)
9518 rtx insn = emit_insn (ix86_gen_pop1 (reg));
9520 if (ix86_cfa_state->reg == crtl->drap_reg
9521 && REGNO (reg) == REGNO (crtl->drap_reg))
9523 /* Previously we'd represented the CFA as an expression
9524 like *(%ebp - 8). We've just popped that value from
9525 the stack, which means we need to reset the CFA to
9526 the drap register. This will remain until we restore
9527 the stack pointer. */
9528 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9529 RTX_FRAME_RELATED_P (insn) = 1;
9533 if (ix86_cfa_state->reg == stack_pointer_rtx)
9535 ix86_cfa_state->offset -= UNITS_PER_WORD;
9536 add_reg_note (insn, REG_CFA_ADJUST_CFA,
9537 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
9538 RTX_FRAME_RELATED_P (insn) = 1;
9541 /* When the frame pointer is the CFA, and we pop it, we are
9542 swapping back to the stack pointer as the CFA. This happens
9543 for stack frames that don't allocate other data, so we assume
9544 the stack pointer is now pointing at the return address, i.e.
9545 the function entry state, which makes the offset be 1 word. */
9546 else if (ix86_cfa_state->reg == hard_frame_pointer_rtx
9547 && reg == hard_frame_pointer_rtx)
9549 ix86_cfa_state->reg = stack_pointer_rtx;
9550 ix86_cfa_state->offset -= UNITS_PER_WORD;
9552 add_reg_note (insn, REG_CFA_DEF_CFA,
9553 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
9554 GEN_INT (ix86_cfa_state->offset)));
9555 RTX_FRAME_RELATED_P (insn) = 1;
9558 ix86_add_cfa_restore_note (insn, reg, red_offset);
9561 /* Emit code to restore saved registers using POP insns. */
9564 ix86_emit_restore_regs_using_pop (HOST_WIDE_INT red_offset)
9568 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9569 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
9571 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno),
9573 red_offset += UNITS_PER_WORD;
9577 /* Emit code and notes for the LEAVE instruction. */
9580 ix86_emit_leave (HOST_WIDE_INT red_offset)
9582 rtx insn = emit_insn (ix86_gen_leave ());
9584 ix86_add_queued_cfa_restore_notes (insn);
9586 if (ix86_cfa_state->reg == hard_frame_pointer_rtx)
9588 ix86_cfa_state->reg = stack_pointer_rtx;
9589 ix86_cfa_state->offset -= UNITS_PER_WORD;
9591 add_reg_note (insn, REG_CFA_ADJUST_CFA,
9592 copy_rtx (XVECEXP (PATTERN (insn), 0, 0)));
9593 RTX_FRAME_RELATED_P (insn) = 1;
9594 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx, red_offset);
9598 /* Emit code to restore saved registers using MOV insns. First register
9599 is restored from POINTER + OFFSET. */
9601 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
9602 HOST_WIDE_INT red_offset,
9603 int maybe_eh_return)
9606 rtx base_address = gen_rtx_MEM (Pmode, pointer);
9609 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9610 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
9612 rtx reg = gen_rtx_REG (Pmode, regno);
9614 /* Ensure that adjust_address won't be forced to produce pointer
9615 out of range allowed by x86-64 instruction set. */
9616 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
9620 r11 = gen_rtx_REG (DImode, R11_REG);
9621 emit_move_insn (r11, GEN_INT (offset));
9622 emit_insn (gen_adddi3 (r11, r11, pointer));
9623 base_address = gen_rtx_MEM (Pmode, r11);
9626 insn = emit_move_insn (reg,
9627 adjust_address (base_address, Pmode, offset));
9628 offset += UNITS_PER_WORD;
9630 if (ix86_cfa_state->reg == crtl->drap_reg
9631 && regno == REGNO (crtl->drap_reg))
9633 /* Previously we'd represented the CFA as an expression
9634 like *(%ebp - 8). We've just popped that value from
9635 the stack, which means we need to reset the CFA to
9636 the drap register. This will remain until we restore
9637 the stack pointer. */
9638 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9639 RTX_FRAME_RELATED_P (insn) = 1;
9642 ix86_add_cfa_restore_note (NULL_RTX, reg, red_offset);
9644 red_offset += UNITS_PER_WORD;
9648 /* Emit code to restore saved registers using MOV insns. First register
9649 is restored from POINTER + OFFSET. */
9651 ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
9652 HOST_WIDE_INT red_offset,
9653 int maybe_eh_return)
9656 rtx base_address = gen_rtx_MEM (TImode, pointer);
9659 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9660 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
9662 rtx reg = gen_rtx_REG (TImode, regno);
9664 /* Ensure that adjust_address won't be forced to produce pointer
9665 out of range allowed by x86-64 instruction set. */
9666 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
9670 r11 = gen_rtx_REG (DImode, R11_REG);
9671 emit_move_insn (r11, GEN_INT (offset));
9672 emit_insn (gen_adddi3 (r11, r11, pointer));
9673 base_address = gen_rtx_MEM (TImode, r11);
9676 mem = adjust_address (base_address, TImode, offset);
9677 set_mem_align (mem, 128);
9678 emit_move_insn (reg, mem);
9681 ix86_add_cfa_restore_note (NULL_RTX, reg, red_offset);
9687 /* Restore function stack, frame, and registers. */
9690 ix86_expand_epilogue (int style)
9693 struct ix86_frame frame;
9694 HOST_WIDE_INT offset, red_offset;
9695 struct machine_cfa_state cfa_state_save = *ix86_cfa_state;
9698 ix86_finalize_stack_realign_flags ();
9700 /* When stack is realigned, SP must be valid. */
9701 sp_valid = (!frame_pointer_needed
9702 || current_function_sp_is_unchanging
9703 || stack_realign_fp);
9705 ix86_compute_frame_layout (&frame);
9707 /* See the comment about red zone and frame
9708 pointer usage in ix86_expand_prologue. */
9709 if (frame_pointer_needed && frame.red_zone_size)
9710 emit_insn (gen_memory_blockage ());
9712 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
9713 gcc_assert (!using_drap || ix86_cfa_state->reg == crtl->drap_reg);
9715 /* Calculate start of saved registers relative to ebp. Special care
9716 must be taken for the normal return case of a function using
9717 eh_return: the eax and edx registers are marked as saved, but not
9718 restored along this path. */
9719 offset = frame.nregs;
9720 if (crtl->calls_eh_return && style != 2)
9722 offset *= -UNITS_PER_WORD;
9723 offset -= frame.nsseregs * 16 + frame.padding0;
9725 /* Calculate start of saved registers relative to esp on entry of the
9726 function. When realigning stack, this needs to be the most negative
9727 value possible at runtime. */
9728 red_offset = offset;
9730 red_offset -= crtl->stack_alignment_needed / BITS_PER_UNIT
9732 else if (stack_realign_fp)
9733 red_offset -= crtl->stack_alignment_needed / BITS_PER_UNIT
9735 if (ix86_static_chain_on_stack)
9736 red_offset -= UNITS_PER_WORD;
9737 if (frame_pointer_needed)
9738 red_offset -= UNITS_PER_WORD;
9740 /* If we're only restoring one register and sp is not valid then
9741 using a move instruction to restore the register since it's
9742 less work than reloading sp and popping the register.
9744 The default code result in stack adjustment using add/lea instruction,
9745 while this code results in LEAVE instruction (or discrete equivalent),
9746 so it is profitable in some other cases as well. Especially when there
9747 are no registers to restore. We also use this code when TARGET_USE_LEAVE
9748 and there is exactly one register to pop. This heuristic may need some
9749 tuning in future. */
9750 if ((!sp_valid && (frame.nregs + frame.nsseregs) <= 1)
9751 || (TARGET_EPILOGUE_USING_MOVE
9752 && cfun->machine->use_fast_prologue_epilogue
9753 && ((frame.nregs + frame.nsseregs) > 1
9754 || (frame.to_allocate + frame.padding0) != 0))
9755 || (frame_pointer_needed && !(frame.nregs + frame.nsseregs)
9756 && (frame.to_allocate + frame.padding0) != 0)
9757 || (frame_pointer_needed && TARGET_USE_LEAVE
9758 && cfun->machine->use_fast_prologue_epilogue
9759 && (frame.nregs + frame.nsseregs) == 1)
9760 || crtl->calls_eh_return)
9762 /* Restore registers. We can use ebp or esp to address the memory
9763 locations. If both are available, default to ebp, since offsets
9764 are known to be small. Only exception is esp pointing directly
9765 to the end of block of saved registers, where we may simplify
9768 If we are realigning stack with bp and sp, regs restore can't
9769 be addressed by bp. sp must be used instead. */
9771 if (!frame_pointer_needed
9772 || (sp_valid && !(frame.to_allocate + frame.padding0))
9773 || stack_realign_fp)
9775 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
9776 frame.to_allocate, red_offset,
9778 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
9780 + frame.nsseregs * 16
9783 + frame.nsseregs * 16
9784 + frame.padding0, style == 2);
9788 ix86_emit_restore_sse_regs_using_mov (hard_frame_pointer_rtx,
9791 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
9793 + frame.nsseregs * 16
9796 + frame.nsseregs * 16
9797 + frame.padding0, style == 2);
9800 red_offset -= offset;
9802 /* eh_return epilogues need %ecx added to the stack pointer. */
9805 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
9807 /* Stack align doesn't work with eh_return. */
9808 gcc_assert (!crtl->stack_realign_needed);
9809 /* Neither does regparm nested functions. */
9810 gcc_assert (!ix86_static_chain_on_stack);
9812 if (frame_pointer_needed)
9814 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
9815 tmp = plus_constant (tmp, UNITS_PER_WORD);
9816 tmp = emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
9818 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
9819 tmp = emit_move_insn (hard_frame_pointer_rtx, tmp);
9821 /* Note that we use SA as a temporary CFA, as the return
9822 address is at the proper place relative to it. We
9823 pretend this happens at the FP restore insn because
9824 prior to this insn the FP would be stored at the wrong
9825 offset relative to SA, and after this insn we have no
9826 other reasonable register to use for the CFA. We don't
9827 bother resetting the CFA to the SP for the duration of
9829 add_reg_note (tmp, REG_CFA_DEF_CFA,
9830 plus_constant (sa, UNITS_PER_WORD));
9831 ix86_add_queued_cfa_restore_notes (tmp);
9832 add_reg_note (tmp, REG_CFA_RESTORE, hard_frame_pointer_rtx);
9833 RTX_FRAME_RELATED_P (tmp) = 1;
9834 ix86_cfa_state->reg = sa;
9835 ix86_cfa_state->offset = UNITS_PER_WORD;
9837 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
9838 const0_rtx, style, false);
9842 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
9843 tmp = plus_constant (tmp, (frame.to_allocate
9844 + frame.nregs * UNITS_PER_WORD
9845 + frame.nsseregs * 16
9847 tmp = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
9848 ix86_add_queued_cfa_restore_notes (tmp);
9850 gcc_assert (ix86_cfa_state->reg == stack_pointer_rtx);
9851 if (ix86_cfa_state->offset != UNITS_PER_WORD)
9853 ix86_cfa_state->offset = UNITS_PER_WORD;
9854 add_reg_note (tmp, REG_CFA_DEF_CFA,
9855 plus_constant (stack_pointer_rtx,
9857 RTX_FRAME_RELATED_P (tmp) = 1;
9861 else if (!frame_pointer_needed)
9862 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9863 GEN_INT (frame.to_allocate
9864 + frame.nregs * UNITS_PER_WORD
9865 + frame.nsseregs * 16
9867 style, !using_drap);
9868 /* If not an i386, mov & pop is faster than "leave". */
9869 else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun)
9870 || !cfun->machine->use_fast_prologue_epilogue)
9871 ix86_emit_leave (red_offset);
9874 pro_epilogue_adjust_stack (stack_pointer_rtx,
9875 hard_frame_pointer_rtx,
9876 const0_rtx, style, !using_drap);
9878 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx, red_offset);
9883 /* First step is to deallocate the stack frame so that we can
9886 If we realign stack with frame pointer, then stack pointer
9887 won't be able to recover via lea $offset(%bp), %sp, because
9888 there is a padding area between bp and sp for realign.
9889 "add $to_allocate, %sp" must be used instead. */
9892 gcc_assert (frame_pointer_needed);
9893 gcc_assert (!stack_realign_fp);
9894 pro_epilogue_adjust_stack (stack_pointer_rtx,
9895 hard_frame_pointer_rtx,
9896 GEN_INT (offset), style, false);
9897 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
9900 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9901 GEN_INT (frame.nsseregs * 16
9905 else if (frame.to_allocate || frame.padding0 || frame.nsseregs)
9907 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
9908 frame.to_allocate, red_offset,
9910 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9911 GEN_INT (frame.to_allocate
9912 + frame.nsseregs * 16
9913 + frame.padding0), style,
9914 !using_drap && !frame_pointer_needed);
9917 ix86_emit_restore_regs_using_pop (red_offset + frame.nsseregs * 16
9919 red_offset -= offset;
9921 if (frame_pointer_needed)
9923 /* Leave results in shorter dependency chains on CPUs that are
9924 able to grok it fast. */
9925 if (TARGET_USE_LEAVE)
9926 ix86_emit_leave (red_offset);
9929 /* For stack realigned really happens, recover stack
9930 pointer to hard frame pointer is a must, if not using
9932 if (stack_realign_fp)
9933 pro_epilogue_adjust_stack (stack_pointer_rtx,
9934 hard_frame_pointer_rtx,
9935 const0_rtx, style, !using_drap);
9936 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx,
9944 int param_ptr_offset = UNITS_PER_WORD;
9947 gcc_assert (stack_realign_drap);
9949 if (ix86_static_chain_on_stack)
9950 param_ptr_offset += UNITS_PER_WORD;
9951 if (!call_used_regs[REGNO (crtl->drap_reg)])
9952 param_ptr_offset += UNITS_PER_WORD;
9954 insn = emit_insn (gen_rtx_SET
9955 (VOIDmode, stack_pointer_rtx,
9956 gen_rtx_PLUS (Pmode,
9958 GEN_INT (-param_ptr_offset))));
9959 ix86_cfa_state->reg = stack_pointer_rtx;
9960 ix86_cfa_state->offset = param_ptr_offset;
9962 add_reg_note (insn, REG_CFA_DEF_CFA,
9963 gen_rtx_PLUS (Pmode, ix86_cfa_state->reg,
9964 GEN_INT (ix86_cfa_state->offset)));
9965 RTX_FRAME_RELATED_P (insn) = 1;
9967 if (!call_used_regs[REGNO (crtl->drap_reg)])
9968 ix86_emit_restore_reg_using_pop (crtl->drap_reg, -UNITS_PER_WORD);
9971 /* Remove the saved static chain from the stack. The use of ECX is
9972 merely as a scratch register, not as the actual static chain. */
9973 if (ix86_static_chain_on_stack)
9977 gcc_assert (ix86_cfa_state->reg == stack_pointer_rtx);
9978 ix86_cfa_state->offset += UNITS_PER_WORD;
9980 r = gen_rtx_REG (Pmode, CX_REG);
9981 insn = emit_insn (ix86_gen_pop1 (r));
9983 r = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
9984 r = gen_rtx_SET (VOIDmode, stack_pointer_rtx, r);
9985 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
9986 RTX_FRAME_RELATED_P (insn) = 1;
9989 /* Sibcall epilogues don't want a return instruction. */
9992 *ix86_cfa_state = cfa_state_save;
9996 if (crtl->args.pops_args && crtl->args.size)
9998 rtx popc = GEN_INT (crtl->args.pops_args);
10000 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10001 address, do explicit add, and jump indirectly to the caller. */
10003 if (crtl->args.pops_args >= 65536)
10005 rtx ecx = gen_rtx_REG (SImode, CX_REG);
10008 /* There is no "pascal" calling convention in any 64bit ABI. */
10009 gcc_assert (!TARGET_64BIT);
10011 insn = emit_insn (gen_popsi1 (ecx));
10012 ix86_cfa_state->offset -= UNITS_PER_WORD;
10014 add_reg_note (insn, REG_CFA_ADJUST_CFA,
10015 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
10016 add_reg_note (insn, REG_CFA_REGISTER,
10017 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
10018 RTX_FRAME_RELATED_P (insn) = 1;
10020 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10022 emit_jump_insn (gen_return_indirect_internal (ecx));
10025 emit_jump_insn (gen_return_pop_internal (popc));
10028 emit_jump_insn (gen_return_internal ());
10030 /* Restore the state back to the state from the prologue,
10031 so that it's correct for the next epilogue. */
10032 *ix86_cfa_state = cfa_state_save;
10035 /* Reset from the function's potential modifications. */
10038 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
10039 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
10041 if (pic_offset_table_rtx)
10042 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
10044 /* Mach-O doesn't support labels at the end of objects, so if
10045 it looks like we might want one, insert a NOP. */
10047 rtx insn = get_last_insn ();
10050 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
10051 insn = PREV_INSN (insn);
10055 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
10056 fputs ("\tnop\n", file);
10062 /* Extract the parts of an RTL expression that is a valid memory address
10063 for an instruction. Return 0 if the structure of the address is
10064 grossly off. Return -1 if the address contains ASHIFT, so it is not
10065 strictly valid, but still used for computing length of lea instruction. */
10068 ix86_decompose_address (rtx addr, struct ix86_address *out)
10070 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
10071 rtx base_reg, index_reg;
10072 HOST_WIDE_INT scale = 1;
10073 rtx scale_rtx = NULL_RTX;
10076 enum ix86_address_seg seg = SEG_DEFAULT;
10078 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
10080 else if (GET_CODE (addr) == PLUS)
10082 rtx addends[4], op;
10090 addends[n++] = XEXP (op, 1);
10093 while (GET_CODE (op) == PLUS);
10098 for (i = n; i >= 0; --i)
10101 switch (GET_CODE (op))
10106 index = XEXP (op, 0);
10107 scale_rtx = XEXP (op, 1);
10113 index = XEXP (op, 0);
10114 tmp = XEXP (op, 1);
10115 if (!CONST_INT_P (tmp))
10117 scale = INTVAL (tmp);
10118 if ((unsigned HOST_WIDE_INT) scale > 3)
10120 scale = 1 << scale;
10124 if (XINT (op, 1) == UNSPEC_TP
10125 && TARGET_TLS_DIRECT_SEG_REFS
10126 && seg == SEG_DEFAULT)
10127 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
10156 else if (GET_CODE (addr) == MULT)
10158 index = XEXP (addr, 0); /* index*scale */
10159 scale_rtx = XEXP (addr, 1);
10161 else if (GET_CODE (addr) == ASHIFT)
10163 /* We're called for lea too, which implements ashift on occasion. */
10164 index = XEXP (addr, 0);
10165 tmp = XEXP (addr, 1);
10166 if (!CONST_INT_P (tmp))
10168 scale = INTVAL (tmp);
10169 if ((unsigned HOST_WIDE_INT) scale > 3)
10171 scale = 1 << scale;
10175 disp = addr; /* displacement */
10177 /* Extract the integral value of scale. */
10180 if (!CONST_INT_P (scale_rtx))
10182 scale = INTVAL (scale_rtx);
10185 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
10186 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
10188 /* Avoid useless 0 displacement. */
10189 if (disp == const0_rtx && (base || index))
10192 /* Allow arg pointer and stack pointer as index if there is not scaling. */
10193 if (base_reg && index_reg && scale == 1
10194 && (index_reg == arg_pointer_rtx
10195 || index_reg == frame_pointer_rtx
10196 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
10199 tmp = base, base = index, index = tmp;
10200 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
10203 /* Special case: %ebp cannot be encoded as a base without a displacement.
10207 && (base_reg == hard_frame_pointer_rtx
10208 || base_reg == frame_pointer_rtx
10209 || base_reg == arg_pointer_rtx
10210 || (REG_P (base_reg)
10211 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
10212 || REGNO (base_reg) == R13_REG))))
10215 /* Special case: on K6, [%esi] makes the instruction vector decoded.
10216 Avoid this by transforming to [%esi+0].
10217 Reload calls address legitimization without cfun defined, so we need
10218 to test cfun for being non-NULL. */
10219 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
10220 && base_reg && !index_reg && !disp
10221 && REG_P (base_reg)
10222 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
10225 /* Special case: encode reg+reg instead of reg*2. */
10226 if (!base && index && scale == 2)
10227 base = index, base_reg = index_reg, scale = 1;
10229 /* Special case: scaling cannot be encoded without base or displacement. */
10230 if (!base && !disp && index && scale != 1)
10234 out->index = index;
10236 out->scale = scale;
10242 /* Return cost of the memory address x.
10243 For i386, it is better to use a complex address than let gcc copy
10244 the address into a reg and make a new pseudo. But not if the address
10245 requires to two regs - that would mean more pseudos with longer
10248 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
10250 struct ix86_address parts;
10252 int ok = ix86_decompose_address (x, &parts);
10256 if (parts.base && GET_CODE (parts.base) == SUBREG)
10257 parts.base = SUBREG_REG (parts.base);
10258 if (parts.index && GET_CODE (parts.index) == SUBREG)
10259 parts.index = SUBREG_REG (parts.index);
10261 /* Attempt to minimize number of registers in the address. */
10263 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
10265 && (!REG_P (parts.index)
10266 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
10270 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
10272 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
10273 && parts.base != parts.index)
10276 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
10277 since it's predecode logic can't detect the length of instructions
10278 and it degenerates to vector decoded. Increase cost of such
10279 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
10280 to split such addresses or even refuse such addresses at all.
10282 Following addressing modes are affected:
10287 The first and last case may be avoidable by explicitly coding the zero in
10288 memory address, but I don't have AMD-K6 machine handy to check this
10292 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
10293 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
10294 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
10300 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
10301 this is used for to form addresses to local data when -fPIC is in
10305 darwin_local_data_pic (rtx disp)
10307 return (GET_CODE (disp) == UNSPEC
10308 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
10311 /* Determine if a given RTX is a valid constant. We already know this
10312 satisfies CONSTANT_P. */
10315 legitimate_constant_p (rtx x)
10317 switch (GET_CODE (x))
10322 if (GET_CODE (x) == PLUS)
10324 if (!CONST_INT_P (XEXP (x, 1)))
10329 if (TARGET_MACHO && darwin_local_data_pic (x))
10332 /* Only some unspecs are valid as "constants". */
10333 if (GET_CODE (x) == UNSPEC)
10334 switch (XINT (x, 1))
10337 case UNSPEC_GOTOFF:
10338 case UNSPEC_PLTOFF:
10339 return TARGET_64BIT;
10341 case UNSPEC_NTPOFF:
10342 x = XVECEXP (x, 0, 0);
10343 return (GET_CODE (x) == SYMBOL_REF
10344 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
10345 case UNSPEC_DTPOFF:
10346 x = XVECEXP (x, 0, 0);
10347 return (GET_CODE (x) == SYMBOL_REF
10348 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
10353 /* We must have drilled down to a symbol. */
10354 if (GET_CODE (x) == LABEL_REF)
10356 if (GET_CODE (x) != SYMBOL_REF)
10361 /* TLS symbols are never valid. */
10362 if (SYMBOL_REF_TLS_MODEL (x))
10365 /* DLLIMPORT symbols are never valid. */
10366 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10367 && SYMBOL_REF_DLLIMPORT_P (x))
10372 if (GET_MODE (x) == TImode
10373 && x != CONST0_RTX (TImode)
10379 if (!standard_sse_constant_p (x))
10386 /* Otherwise we handle everything else in the move patterns. */
10390 /* Determine if it's legal to put X into the constant pool. This
10391 is not possible for the address of thread-local symbols, which
10392 is checked above. */
10395 ix86_cannot_force_const_mem (rtx x)
10397 /* We can always put integral constants and vectors in memory. */
10398 switch (GET_CODE (x))
10408 return !legitimate_constant_p (x);
10412 /* Nonzero if the constant value X is a legitimate general operand
10413 when generating PIC code. It is given that flag_pic is on and
10414 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
10417 legitimate_pic_operand_p (rtx x)
10421 switch (GET_CODE (x))
10424 inner = XEXP (x, 0);
10425 if (GET_CODE (inner) == PLUS
10426 && CONST_INT_P (XEXP (inner, 1)))
10427 inner = XEXP (inner, 0);
10429 /* Only some unspecs are valid as "constants". */
10430 if (GET_CODE (inner) == UNSPEC)
10431 switch (XINT (inner, 1))
10434 case UNSPEC_GOTOFF:
10435 case UNSPEC_PLTOFF:
10436 return TARGET_64BIT;
10438 x = XVECEXP (inner, 0, 0);
10439 return (GET_CODE (x) == SYMBOL_REF
10440 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
10441 case UNSPEC_MACHOPIC_OFFSET:
10442 return legitimate_pic_address_disp_p (x);
10450 return legitimate_pic_address_disp_p (x);
10457 /* Determine if a given CONST RTX is a valid memory displacement
10461 legitimate_pic_address_disp_p (rtx disp)
10465 /* In 64bit mode we can allow direct addresses of symbols and labels
10466 when they are not dynamic symbols. */
10469 rtx op0 = disp, op1;
10471 switch (GET_CODE (disp))
10477 if (GET_CODE (XEXP (disp, 0)) != PLUS)
10479 op0 = XEXP (XEXP (disp, 0), 0);
10480 op1 = XEXP (XEXP (disp, 0), 1);
10481 if (!CONST_INT_P (op1)
10482 || INTVAL (op1) >= 16*1024*1024
10483 || INTVAL (op1) < -16*1024*1024)
10485 if (GET_CODE (op0) == LABEL_REF)
10487 if (GET_CODE (op0) != SYMBOL_REF)
10492 /* TLS references should always be enclosed in UNSPEC. */
10493 if (SYMBOL_REF_TLS_MODEL (op0))
10495 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
10496 && ix86_cmodel != CM_LARGE_PIC)
10504 if (GET_CODE (disp) != CONST)
10506 disp = XEXP (disp, 0);
10510 /* We are unsafe to allow PLUS expressions. This limit allowed distance
10511 of GOT tables. We should not need these anyway. */
10512 if (GET_CODE (disp) != UNSPEC
10513 || (XINT (disp, 1) != UNSPEC_GOTPCREL
10514 && XINT (disp, 1) != UNSPEC_GOTOFF
10515 && XINT (disp, 1) != UNSPEC_PLTOFF))
10518 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
10519 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
10525 if (GET_CODE (disp) == PLUS)
10527 if (!CONST_INT_P (XEXP (disp, 1)))
10529 disp = XEXP (disp, 0);
10533 if (TARGET_MACHO && darwin_local_data_pic (disp))
10536 if (GET_CODE (disp) != UNSPEC)
10539 switch (XINT (disp, 1))
10544 /* We need to check for both symbols and labels because VxWorks loads
10545 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
10547 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
10548 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
10549 case UNSPEC_GOTOFF:
10550 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
10551 While ABI specify also 32bit relocation but we don't produce it in
10552 small PIC model at all. */
10553 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
10554 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
10556 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
10558 case UNSPEC_GOTTPOFF:
10559 case UNSPEC_GOTNTPOFF:
10560 case UNSPEC_INDNTPOFF:
10563 disp = XVECEXP (disp, 0, 0);
10564 return (GET_CODE (disp) == SYMBOL_REF
10565 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
10566 case UNSPEC_NTPOFF:
10567 disp = XVECEXP (disp, 0, 0);
10568 return (GET_CODE (disp) == SYMBOL_REF
10569 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
10570 case UNSPEC_DTPOFF:
10571 disp = XVECEXP (disp, 0, 0);
10572 return (GET_CODE (disp) == SYMBOL_REF
10573 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
10579 /* Recognizes RTL expressions that are valid memory addresses for an
10580 instruction. The MODE argument is the machine mode for the MEM
10581 expression that wants to use this address.
10583 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
10584 convert common non-canonical forms to canonical form so that they will
10588 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
10589 rtx addr, bool strict)
10591 struct ix86_address parts;
10592 rtx base, index, disp;
10593 HOST_WIDE_INT scale;
10595 if (ix86_decompose_address (addr, &parts) <= 0)
10596 /* Decomposition failed. */
10600 index = parts.index;
10602 scale = parts.scale;
10604 /* Validate base register.
10606 Don't allow SUBREG's that span more than a word here. It can lead to spill
10607 failures when the base is one word out of a two word structure, which is
10608 represented internally as a DImode int. */
10616 else if (GET_CODE (base) == SUBREG
10617 && REG_P (SUBREG_REG (base))
10618 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
10620 reg = SUBREG_REG (base);
10622 /* Base is not a register. */
10625 if (GET_MODE (base) != Pmode)
10626 /* Base is not in Pmode. */
10629 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
10630 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
10631 /* Base is not valid. */
10635 /* Validate index register.
10637 Don't allow SUBREG's that span more than a word here -- same as above. */
10645 else if (GET_CODE (index) == SUBREG
10646 && REG_P (SUBREG_REG (index))
10647 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
10649 reg = SUBREG_REG (index);
10651 /* Index is not a register. */
10654 if (GET_MODE (index) != Pmode)
10655 /* Index is not in Pmode. */
10658 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
10659 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
10660 /* Index is not valid. */
10664 /* Validate scale factor. */
10668 /* Scale without index. */
10671 if (scale != 2 && scale != 4 && scale != 8)
10672 /* Scale is not a valid multiplier. */
10676 /* Validate displacement. */
10679 if (GET_CODE (disp) == CONST
10680 && GET_CODE (XEXP (disp, 0)) == UNSPEC
10681 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
10682 switch (XINT (XEXP (disp, 0), 1))
10684 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
10685 used. While ABI specify also 32bit relocations, we don't produce
10686 them at all and use IP relative instead. */
10688 case UNSPEC_GOTOFF:
10689 gcc_assert (flag_pic);
10691 goto is_legitimate_pic;
10693 /* 64bit address unspec. */
10696 case UNSPEC_GOTPCREL:
10697 gcc_assert (flag_pic);
10698 goto is_legitimate_pic;
10700 case UNSPEC_GOTTPOFF:
10701 case UNSPEC_GOTNTPOFF:
10702 case UNSPEC_INDNTPOFF:
10703 case UNSPEC_NTPOFF:
10704 case UNSPEC_DTPOFF:
10708 /* Invalid address unspec. */
10712 else if (SYMBOLIC_CONST (disp)
10716 && MACHOPIC_INDIRECT
10717 && !machopic_operand_p (disp)
10723 if (TARGET_64BIT && (index || base))
10725 /* foo@dtpoff(%rX) is ok. */
10726 if (GET_CODE (disp) != CONST
10727 || GET_CODE (XEXP (disp, 0)) != PLUS
10728 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
10729 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
10730 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
10731 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
10732 /* Non-constant pic memory reference. */
10735 else if (! legitimate_pic_address_disp_p (disp))
10736 /* Displacement is an invalid pic construct. */
10739 /* This code used to verify that a symbolic pic displacement
10740 includes the pic_offset_table_rtx register.
10742 While this is good idea, unfortunately these constructs may
10743 be created by "adds using lea" optimization for incorrect
10752 This code is nonsensical, but results in addressing
10753 GOT table with pic_offset_table_rtx base. We can't
10754 just refuse it easily, since it gets matched by
10755 "addsi3" pattern, that later gets split to lea in the
10756 case output register differs from input. While this
10757 can be handled by separate addsi pattern for this case
10758 that never results in lea, this seems to be easier and
10759 correct fix for crash to disable this test. */
10761 else if (GET_CODE (disp) != LABEL_REF
10762 && !CONST_INT_P (disp)
10763 && (GET_CODE (disp) != CONST
10764 || !legitimate_constant_p (disp))
10765 && (GET_CODE (disp) != SYMBOL_REF
10766 || !legitimate_constant_p (disp)))
10767 /* Displacement is not constant. */
10769 else if (TARGET_64BIT
10770 && !x86_64_immediate_operand (disp, VOIDmode))
10771 /* Displacement is out of range. */
10775 /* Everything looks valid. */
10779 /* Determine if a given RTX is a valid constant address. */
10782 constant_address_p (rtx x)
10784 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
10787 /* Return a unique alias set for the GOT. */
10789 static alias_set_type
10790 ix86_GOT_alias_set (void)
10792 static alias_set_type set = -1;
10794 set = new_alias_set ();
10798 /* Return a legitimate reference for ORIG (an address) using the
10799 register REG. If REG is 0, a new pseudo is generated.
10801 There are two types of references that must be handled:
10803 1. Global data references must load the address from the GOT, via
10804 the PIC reg. An insn is emitted to do this load, and the reg is
10807 2. Static data references, constant pool addresses, and code labels
10808 compute the address as an offset from the GOT, whose base is in
10809 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
10810 differentiate them from global data objects. The returned
10811 address is the PIC reg + an unspec constant.
10813 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
10814 reg also appears in the address. */
10817 legitimize_pic_address (rtx orig, rtx reg)
10820 rtx new_rtx = orig;
10824 if (TARGET_MACHO && !TARGET_64BIT)
10827 reg = gen_reg_rtx (Pmode);
10828 /* Use the generic Mach-O PIC machinery. */
10829 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
10833 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
10835 else if (TARGET_64BIT
10836 && ix86_cmodel != CM_SMALL_PIC
10837 && gotoff_operand (addr, Pmode))
10840 /* This symbol may be referenced via a displacement from the PIC
10841 base address (@GOTOFF). */
10843 if (reload_in_progress)
10844 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
10845 if (GET_CODE (addr) == CONST)
10846 addr = XEXP (addr, 0);
10847 if (GET_CODE (addr) == PLUS)
10849 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
10851 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
10854 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
10855 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10857 tmpreg = gen_reg_rtx (Pmode);
10860 emit_move_insn (tmpreg, new_rtx);
10864 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
10865 tmpreg, 1, OPTAB_DIRECT);
10868 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
10870 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
10872 /* This symbol may be referenced via a displacement from the PIC
10873 base address (@GOTOFF). */
10875 if (reload_in_progress)
10876 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
10877 if (GET_CODE (addr) == CONST)
10878 addr = XEXP (addr, 0);
10879 if (GET_CODE (addr) == PLUS)
10881 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
10883 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
10886 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
10887 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10888 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
10892 emit_move_insn (reg, new_rtx);
10896 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
10897 /* We can't use @GOTOFF for text labels on VxWorks;
10898 see gotoff_operand. */
10899 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
10901 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
10903 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
10904 return legitimize_dllimport_symbol (addr, true);
10905 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
10906 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
10907 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
10909 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
10910 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
10914 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
10916 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
10917 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10918 new_rtx = gen_const_mem (Pmode, new_rtx);
10919 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
10922 reg = gen_reg_rtx (Pmode);
10923 /* Use directly gen_movsi, otherwise the address is loaded
10924 into register for CSE. We don't want to CSE this addresses,
10925 instead we CSE addresses from the GOT table, so skip this. */
10926 emit_insn (gen_movsi (reg, new_rtx));
10931 /* This symbol must be referenced via a load from the
10932 Global Offset Table (@GOT). */
10934 if (reload_in_progress)
10935 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
10936 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
10937 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10939 new_rtx = force_reg (Pmode, new_rtx);
10940 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
10941 new_rtx = gen_const_mem (Pmode, new_rtx);
10942 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
10945 reg = gen_reg_rtx (Pmode);
10946 emit_move_insn (reg, new_rtx);
10952 if (CONST_INT_P (addr)
10953 && !x86_64_immediate_operand (addr, VOIDmode))
10957 emit_move_insn (reg, addr);
10961 new_rtx = force_reg (Pmode, addr);
10963 else if (GET_CODE (addr) == CONST)
10965 addr = XEXP (addr, 0);
10967 /* We must match stuff we generate before. Assume the only
10968 unspecs that can get here are ours. Not that we could do
10969 anything with them anyway.... */
10970 if (GET_CODE (addr) == UNSPEC
10971 || (GET_CODE (addr) == PLUS
10972 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
10974 gcc_assert (GET_CODE (addr) == PLUS);
10976 if (GET_CODE (addr) == PLUS)
10978 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
10980 /* Check first to see if this is a constant offset from a @GOTOFF
10981 symbol reference. */
10982 if (gotoff_operand (op0, Pmode)
10983 && CONST_INT_P (op1))
10987 if (reload_in_progress)
10988 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
10989 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
10991 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
10992 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10993 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
10997 emit_move_insn (reg, new_rtx);
11003 if (INTVAL (op1) < -16*1024*1024
11004 || INTVAL (op1) >= 16*1024*1024)
11006 if (!x86_64_immediate_operand (op1, Pmode))
11007 op1 = force_reg (Pmode, op1);
11008 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
11014 base = legitimize_pic_address (XEXP (addr, 0), reg);
11015 new_rtx = legitimize_pic_address (XEXP (addr, 1),
11016 base == reg ? NULL_RTX : reg);
11018 if (CONST_INT_P (new_rtx))
11019 new_rtx = plus_constant (base, INTVAL (new_rtx));
11022 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
11024 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
11025 new_rtx = XEXP (new_rtx, 1);
11027 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
11035 /* Load the thread pointer. If TO_REG is true, force it into a register. */
11038 get_thread_pointer (int to_reg)
11042 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
11046 reg = gen_reg_rtx (Pmode);
11047 insn = gen_rtx_SET (VOIDmode, reg, tp);
11048 insn = emit_insn (insn);
11053 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
11054 false if we expect this to be used for a memory address and true if
11055 we expect to load the address into a register. */
11058 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
11060 rtx dest, base, off, pic, tp;
11065 case TLS_MODEL_GLOBAL_DYNAMIC:
11066 dest = gen_reg_rtx (Pmode);
11067 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
11069 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
11071 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
11074 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
11075 insns = get_insns ();
11078 RTL_CONST_CALL_P (insns) = 1;
11079 emit_libcall_block (insns, dest, rax, x);
11081 else if (TARGET_64BIT && TARGET_GNU2_TLS)
11082 emit_insn (gen_tls_global_dynamic_64 (dest, x));
11084 emit_insn (gen_tls_global_dynamic_32 (dest, x));
11086 if (TARGET_GNU2_TLS)
11088 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
11090 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
11094 case TLS_MODEL_LOCAL_DYNAMIC:
11095 base = gen_reg_rtx (Pmode);
11096 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
11098 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
11100 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
11103 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
11104 insns = get_insns ();
11107 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
11108 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
11109 RTL_CONST_CALL_P (insns) = 1;
11110 emit_libcall_block (insns, base, rax, note);
11112 else if (TARGET_64BIT && TARGET_GNU2_TLS)
11113 emit_insn (gen_tls_local_dynamic_base_64 (base));
11115 emit_insn (gen_tls_local_dynamic_base_32 (base));
11117 if (TARGET_GNU2_TLS)
11119 rtx x = ix86_tls_module_base ();
11121 set_unique_reg_note (get_last_insn (), REG_EQUIV,
11122 gen_rtx_MINUS (Pmode, x, tp));
11125 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
11126 off = gen_rtx_CONST (Pmode, off);
11128 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
11130 if (TARGET_GNU2_TLS)
11132 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
11134 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
11139 case TLS_MODEL_INITIAL_EXEC:
11143 type = UNSPEC_GOTNTPOFF;
11147 if (reload_in_progress)
11148 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11149 pic = pic_offset_table_rtx;
11150 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
11152 else if (!TARGET_ANY_GNU_TLS)
11154 pic = gen_reg_rtx (Pmode);
11155 emit_insn (gen_set_got (pic));
11156 type = UNSPEC_GOTTPOFF;
11161 type = UNSPEC_INDNTPOFF;
11164 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
11165 off = gen_rtx_CONST (Pmode, off);
11167 off = gen_rtx_PLUS (Pmode, pic, off);
11168 off = gen_const_mem (Pmode, off);
11169 set_mem_alias_set (off, ix86_GOT_alias_set ());
11171 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11173 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11174 off = force_reg (Pmode, off);
11175 return gen_rtx_PLUS (Pmode, base, off);
11179 base = get_thread_pointer (true);
11180 dest = gen_reg_rtx (Pmode);
11181 emit_insn (gen_subsi3 (dest, base, off));
11185 case TLS_MODEL_LOCAL_EXEC:
11186 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
11187 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11188 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
11189 off = gen_rtx_CONST (Pmode, off);
11191 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11193 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11194 return gen_rtx_PLUS (Pmode, base, off);
11198 base = get_thread_pointer (true);
11199 dest = gen_reg_rtx (Pmode);
11200 emit_insn (gen_subsi3 (dest, base, off));
11205 gcc_unreachable ();
11211 /* Create or return the unique __imp_DECL dllimport symbol corresponding
11214 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
11215 htab_t dllimport_map;
11218 get_dllimport_decl (tree decl)
11220 struct tree_map *h, in;
11223 const char *prefix;
11224 size_t namelen, prefixlen;
11229 if (!dllimport_map)
11230 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
11232 in.hash = htab_hash_pointer (decl);
11233 in.base.from = decl;
11234 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
11235 h = (struct tree_map *) *loc;
11239 *loc = h = ggc_alloc_tree_map ();
11241 h->base.from = decl;
11242 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
11243 VAR_DECL, NULL, ptr_type_node);
11244 DECL_ARTIFICIAL (to) = 1;
11245 DECL_IGNORED_P (to) = 1;
11246 DECL_EXTERNAL (to) = 1;
11247 TREE_READONLY (to) = 1;
11249 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
11250 name = targetm.strip_name_encoding (name);
11251 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
11252 ? "*__imp_" : "*__imp__";
11253 namelen = strlen (name);
11254 prefixlen = strlen (prefix);
11255 imp_name = (char *) alloca (namelen + prefixlen + 1);
11256 memcpy (imp_name, prefix, prefixlen);
11257 memcpy (imp_name + prefixlen, name, namelen + 1);
11259 name = ggc_alloc_string (imp_name, namelen + prefixlen);
11260 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
11261 SET_SYMBOL_REF_DECL (rtl, to);
11262 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
11264 rtl = gen_const_mem (Pmode, rtl);
11265 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
11267 SET_DECL_RTL (to, rtl);
11268 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
11273 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
11274 true if we require the result be a register. */
11277 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
11282 gcc_assert (SYMBOL_REF_DECL (symbol));
11283 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
11285 x = DECL_RTL (imp_decl);
11287 x = force_reg (Pmode, x);
11291 /* Try machine-dependent ways of modifying an illegitimate address
11292 to be legitimate. If we find one, return the new, valid address.
11293 This macro is used in only one place: `memory_address' in explow.c.
11295 OLDX is the address as it was before break_out_memory_refs was called.
11296 In some cases it is useful to look at this to decide what needs to be done.
11298 It is always safe for this macro to do nothing. It exists to recognize
11299 opportunities to optimize the output.
11301 For the 80386, we handle X+REG by loading X into a register R and
11302 using R+REG. R will go in a general reg and indexing will be used.
11303 However, if REG is a broken-out memory address or multiplication,
11304 nothing needs to be done because REG can certainly go in a general reg.
11306 When -fpic is used, special handling is needed for symbolic references.
11307 See comments by legitimize_pic_address in i386.c for details. */
11310 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
11311 enum machine_mode mode)
11316 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
11318 return legitimize_tls_address (x, (enum tls_model) log, false);
11319 if (GET_CODE (x) == CONST
11320 && GET_CODE (XEXP (x, 0)) == PLUS
11321 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
11322 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
11324 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
11325 (enum tls_model) log, false);
11326 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
11329 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11331 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
11332 return legitimize_dllimport_symbol (x, true);
11333 if (GET_CODE (x) == CONST
11334 && GET_CODE (XEXP (x, 0)) == PLUS
11335 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
11336 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
11338 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
11339 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
11343 if (flag_pic && SYMBOLIC_CONST (x))
11344 return legitimize_pic_address (x, 0);
11346 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
11347 if (GET_CODE (x) == ASHIFT
11348 && CONST_INT_P (XEXP (x, 1))
11349 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
11352 log = INTVAL (XEXP (x, 1));
11353 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
11354 GEN_INT (1 << log));
11357 if (GET_CODE (x) == PLUS)
11359 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
11361 if (GET_CODE (XEXP (x, 0)) == ASHIFT
11362 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11363 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
11366 log = INTVAL (XEXP (XEXP (x, 0), 1));
11367 XEXP (x, 0) = gen_rtx_MULT (Pmode,
11368 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
11369 GEN_INT (1 << log));
11372 if (GET_CODE (XEXP (x, 1)) == ASHIFT
11373 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11374 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
11377 log = INTVAL (XEXP (XEXP (x, 1), 1));
11378 XEXP (x, 1) = gen_rtx_MULT (Pmode,
11379 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
11380 GEN_INT (1 << log));
11383 /* Put multiply first if it isn't already. */
11384 if (GET_CODE (XEXP (x, 1)) == MULT)
11386 rtx tmp = XEXP (x, 0);
11387 XEXP (x, 0) = XEXP (x, 1);
11392 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
11393 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
11394 created by virtual register instantiation, register elimination, and
11395 similar optimizations. */
11396 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
11399 x = gen_rtx_PLUS (Pmode,
11400 gen_rtx_PLUS (Pmode, XEXP (x, 0),
11401 XEXP (XEXP (x, 1), 0)),
11402 XEXP (XEXP (x, 1), 1));
11406 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
11407 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
11408 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
11409 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11410 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
11411 && CONSTANT_P (XEXP (x, 1)))
11414 rtx other = NULL_RTX;
11416 if (CONST_INT_P (XEXP (x, 1)))
11418 constant = XEXP (x, 1);
11419 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
11421 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
11423 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
11424 other = XEXP (x, 1);
11432 x = gen_rtx_PLUS (Pmode,
11433 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
11434 XEXP (XEXP (XEXP (x, 0), 1), 0)),
11435 plus_constant (other, INTVAL (constant)));
11439 if (changed && ix86_legitimate_address_p (mode, x, FALSE))
11442 if (GET_CODE (XEXP (x, 0)) == MULT)
11445 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
11448 if (GET_CODE (XEXP (x, 1)) == MULT)
11451 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
11455 && REG_P (XEXP (x, 1))
11456 && REG_P (XEXP (x, 0)))
11459 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
11462 x = legitimize_pic_address (x, 0);
11465 if (changed && ix86_legitimate_address_p (mode, x, FALSE))
11468 if (REG_P (XEXP (x, 0)))
11470 rtx temp = gen_reg_rtx (Pmode);
11471 rtx val = force_operand (XEXP (x, 1), temp);
11473 emit_move_insn (temp, val);
11475 XEXP (x, 1) = temp;
11479 else if (REG_P (XEXP (x, 1)))
11481 rtx temp = gen_reg_rtx (Pmode);
11482 rtx val = force_operand (XEXP (x, 0), temp);
11484 emit_move_insn (temp, val);
11486 XEXP (x, 0) = temp;
11494 /* Print an integer constant expression in assembler syntax. Addition
11495 and subtraction are the only arithmetic that may appear in these
11496 expressions. FILE is the stdio stream to write to, X is the rtx, and
11497 CODE is the operand print code from the output string. */
11500 output_pic_addr_const (FILE *file, rtx x, int code)
11504 switch (GET_CODE (x))
11507 gcc_assert (flag_pic);
11512 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
11513 output_addr_const (file, x);
11516 const char *name = XSTR (x, 0);
11518 /* Mark the decl as referenced so that cgraph will
11519 output the function. */
11520 if (SYMBOL_REF_DECL (x))
11521 mark_decl_referenced (SYMBOL_REF_DECL (x));
11524 if (MACHOPIC_INDIRECT
11525 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
11526 name = machopic_indirection_name (x, /*stub_p=*/true);
11528 assemble_name (file, name);
11530 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
11531 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
11532 fputs ("@PLT", file);
11539 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
11540 assemble_name (asm_out_file, buf);
11544 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
11548 /* This used to output parentheses around the expression,
11549 but that does not work on the 386 (either ATT or BSD assembler). */
11550 output_pic_addr_const (file, XEXP (x, 0), code);
11554 if (GET_MODE (x) == VOIDmode)
11556 /* We can use %d if the number is <32 bits and positive. */
11557 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
11558 fprintf (file, "0x%lx%08lx",
11559 (unsigned long) CONST_DOUBLE_HIGH (x),
11560 (unsigned long) CONST_DOUBLE_LOW (x));
11562 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
11565 /* We can't handle floating point constants;
11566 TARGET_PRINT_OPERAND must handle them. */
11567 output_operand_lossage ("floating constant misused");
11571 /* Some assemblers need integer constants to appear first. */
11572 if (CONST_INT_P (XEXP (x, 0)))
11574 output_pic_addr_const (file, XEXP (x, 0), code);
11576 output_pic_addr_const (file, XEXP (x, 1), code);
11580 gcc_assert (CONST_INT_P (XEXP (x, 1)));
11581 output_pic_addr_const (file, XEXP (x, 1), code);
11583 output_pic_addr_const (file, XEXP (x, 0), code);
11589 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
11590 output_pic_addr_const (file, XEXP (x, 0), code);
11592 output_pic_addr_const (file, XEXP (x, 1), code);
11594 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
11598 gcc_assert (XVECLEN (x, 0) == 1);
11599 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
11600 switch (XINT (x, 1))
11603 fputs ("@GOT", file);
11605 case UNSPEC_GOTOFF:
11606 fputs ("@GOTOFF", file);
11608 case UNSPEC_PLTOFF:
11609 fputs ("@PLTOFF", file);
11611 case UNSPEC_GOTPCREL:
11612 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11613 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
11615 case UNSPEC_GOTTPOFF:
11616 /* FIXME: This might be @TPOFF in Sun ld too. */
11617 fputs ("@gottpoff", file);
11620 fputs ("@tpoff", file);
11622 case UNSPEC_NTPOFF:
11624 fputs ("@tpoff", file);
11626 fputs ("@ntpoff", file);
11628 case UNSPEC_DTPOFF:
11629 fputs ("@dtpoff", file);
11631 case UNSPEC_GOTNTPOFF:
11633 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11634 "@gottpoff(%rip)": "@gottpoff[rip]", file);
11636 fputs ("@gotntpoff", file);
11638 case UNSPEC_INDNTPOFF:
11639 fputs ("@indntpoff", file);
11642 case UNSPEC_MACHOPIC_OFFSET:
11644 machopic_output_function_base_name (file);
11648 output_operand_lossage ("invalid UNSPEC as operand");
11654 output_operand_lossage ("invalid expression as operand");
11658 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
11659 We need to emit DTP-relative relocations. */
11661 static void ATTRIBUTE_UNUSED
11662 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
11664 fputs (ASM_LONG, file);
11665 output_addr_const (file, x);
11666 fputs ("@dtpoff", file);
11672 fputs (", 0", file);
11675 gcc_unreachable ();
11679 /* Return true if X is a representation of the PIC register. This copes
11680 with calls from ix86_find_base_term, where the register might have
11681 been replaced by a cselib value. */
11684 ix86_pic_register_p (rtx x)
11686 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
11687 return (pic_offset_table_rtx
11688 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
11690 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
11693 /* In the name of slightly smaller debug output, and to cater to
11694 general assembler lossage, recognize PIC+GOTOFF and turn it back
11695 into a direct symbol reference.
11697 On Darwin, this is necessary to avoid a crash, because Darwin
11698 has a different PIC label for each routine but the DWARF debugging
11699 information is not associated with any particular routine, so it's
11700 necessary to remove references to the PIC label from RTL stored by
11701 the DWARF output code. */
11704 ix86_delegitimize_address (rtx x)
11706 rtx orig_x = delegitimize_mem_from_attrs (x);
11707 /* addend is NULL or some rtx if x is something+GOTOFF where
11708 something doesn't include the PIC register. */
11709 rtx addend = NULL_RTX;
11710 /* reg_addend is NULL or a multiple of some register. */
11711 rtx reg_addend = NULL_RTX;
11712 /* const_addend is NULL or a const_int. */
11713 rtx const_addend = NULL_RTX;
11714 /* This is the result, or NULL. */
11715 rtx result = NULL_RTX;
11724 if (GET_CODE (x) != CONST
11725 || GET_CODE (XEXP (x, 0)) != UNSPEC
11726 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
11727 || !MEM_P (orig_x))
11729 x = XVECEXP (XEXP (x, 0), 0, 0);
11730 if (GET_MODE (orig_x) != Pmode)
11731 return simplify_gen_subreg (GET_MODE (orig_x), x, Pmode, 0);
11735 if (GET_CODE (x) != PLUS
11736 || GET_CODE (XEXP (x, 1)) != CONST)
11739 if (ix86_pic_register_p (XEXP (x, 0)))
11740 /* %ebx + GOT/GOTOFF */
11742 else if (GET_CODE (XEXP (x, 0)) == PLUS)
11744 /* %ebx + %reg * scale + GOT/GOTOFF */
11745 reg_addend = XEXP (x, 0);
11746 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
11747 reg_addend = XEXP (reg_addend, 1);
11748 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
11749 reg_addend = XEXP (reg_addend, 0);
11752 reg_addend = NULL_RTX;
11753 addend = XEXP (x, 0);
11757 addend = XEXP (x, 0);
11759 x = XEXP (XEXP (x, 1), 0);
11760 if (GET_CODE (x) == PLUS
11761 && CONST_INT_P (XEXP (x, 1)))
11763 const_addend = XEXP (x, 1);
11767 if (GET_CODE (x) == UNSPEC
11768 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
11769 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
11770 result = XVECEXP (x, 0, 0);
11772 if (TARGET_MACHO && darwin_local_data_pic (x)
11773 && !MEM_P (orig_x))
11774 result = XVECEXP (x, 0, 0);
11780 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
11782 result = gen_rtx_PLUS (Pmode, reg_addend, result);
11785 /* If the rest of original X doesn't involve the PIC register, add
11786 addend and subtract pic_offset_table_rtx. This can happen e.g.
11788 leal (%ebx, %ecx, 4), %ecx
11790 movl foo@GOTOFF(%ecx), %edx
11791 in which case we return (%ecx - %ebx) + foo. */
11792 if (pic_offset_table_rtx)
11793 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
11794 pic_offset_table_rtx),
11799 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
11800 return simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
11804 /* If X is a machine specific address (i.e. a symbol or label being
11805 referenced as a displacement from the GOT implemented using an
11806 UNSPEC), then return the base term. Otherwise return X. */
11809 ix86_find_base_term (rtx x)
11815 if (GET_CODE (x) != CONST)
11817 term = XEXP (x, 0);
11818 if (GET_CODE (term) == PLUS
11819 && (CONST_INT_P (XEXP (term, 1))
11820 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
11821 term = XEXP (term, 0);
11822 if (GET_CODE (term) != UNSPEC
11823 || XINT (term, 1) != UNSPEC_GOTPCREL)
11826 return XVECEXP (term, 0, 0);
11829 return ix86_delegitimize_address (x);
11833 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
11834 int fp, FILE *file)
11836 const char *suffix;
11838 if (mode == CCFPmode || mode == CCFPUmode)
11840 code = ix86_fp_compare_code_to_integer (code);
11844 code = reverse_condition (code);
11895 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
11899 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
11900 Those same assemblers have the same but opposite lossage on cmov. */
11901 if (mode == CCmode)
11902 suffix = fp ? "nbe" : "a";
11903 else if (mode == CCCmode)
11906 gcc_unreachable ();
11922 gcc_unreachable ();
11926 gcc_assert (mode == CCmode || mode == CCCmode);
11943 gcc_unreachable ();
11947 /* ??? As above. */
11948 gcc_assert (mode == CCmode || mode == CCCmode);
11949 suffix = fp ? "nb" : "ae";
11952 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
11956 /* ??? As above. */
11957 if (mode == CCmode)
11959 else if (mode == CCCmode)
11960 suffix = fp ? "nb" : "ae";
11962 gcc_unreachable ();
11965 suffix = fp ? "u" : "p";
11968 suffix = fp ? "nu" : "np";
11971 gcc_unreachable ();
11973 fputs (suffix, file);
11976 /* Print the name of register X to FILE based on its machine mode and number.
11977 If CODE is 'w', pretend the mode is HImode.
11978 If CODE is 'b', pretend the mode is QImode.
11979 If CODE is 'k', pretend the mode is SImode.
11980 If CODE is 'q', pretend the mode is DImode.
11981 If CODE is 'x', pretend the mode is V4SFmode.
11982 If CODE is 't', pretend the mode is V8SFmode.
11983 If CODE is 'h', pretend the reg is the 'high' byte register.
11984 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
11985 If CODE is 'd', duplicate the operand for AVX instruction.
11989 print_reg (rtx x, int code, FILE *file)
11992 bool duplicated = code == 'd' && TARGET_AVX;
11994 gcc_assert (x == pc_rtx
11995 || (REGNO (x) != ARG_POINTER_REGNUM
11996 && REGNO (x) != FRAME_POINTER_REGNUM
11997 && REGNO (x) != FLAGS_REG
11998 && REGNO (x) != FPSR_REG
11999 && REGNO (x) != FPCR_REG));
12001 if (ASSEMBLER_DIALECT == ASM_ATT)
12006 gcc_assert (TARGET_64BIT);
12007 fputs ("rip", file);
12011 if (code == 'w' || MMX_REG_P (x))
12013 else if (code == 'b')
12015 else if (code == 'k')
12017 else if (code == 'q')
12019 else if (code == 'y')
12021 else if (code == 'h')
12023 else if (code == 'x')
12025 else if (code == 't')
12028 code = GET_MODE_SIZE (GET_MODE (x));
12030 /* Irritatingly, AMD extended registers use different naming convention
12031 from the normal registers. */
12032 if (REX_INT_REG_P (x))
12034 gcc_assert (TARGET_64BIT);
12038 error ("extended registers have no high halves");
12041 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
12044 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
12047 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
12050 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
12053 error ("unsupported operand size for extended register");
12063 if (STACK_TOP_P (x))
12072 if (! ANY_FP_REG_P (x))
12073 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
12078 reg = hi_reg_name[REGNO (x)];
12081 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
12083 reg = qi_reg_name[REGNO (x)];
12086 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
12088 reg = qi_high_reg_name[REGNO (x)];
12093 gcc_assert (!duplicated);
12095 fputs (hi_reg_name[REGNO (x)] + 1, file);
12100 gcc_unreachable ();
12106 if (ASSEMBLER_DIALECT == ASM_ATT)
12107 fprintf (file, ", %%%s", reg);
12109 fprintf (file, ", %s", reg);
12113 /* Locate some local-dynamic symbol still in use by this function
12114 so that we can print its name in some tls_local_dynamic_base
12118 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
12122 if (GET_CODE (x) == SYMBOL_REF
12123 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
12125 cfun->machine->some_ld_name = XSTR (x, 0);
12132 static const char *
12133 get_some_local_dynamic_name (void)
12137 if (cfun->machine->some_ld_name)
12138 return cfun->machine->some_ld_name;
12140 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
12141 if (NONDEBUG_INSN_P (insn)
12142 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
12143 return cfun->machine->some_ld_name;
12148 /* Meaning of CODE:
12149 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
12150 C -- print opcode suffix for set/cmov insn.
12151 c -- like C, but print reversed condition
12152 F,f -- likewise, but for floating-point.
12153 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
12155 R -- print the prefix for register names.
12156 z -- print the opcode suffix for the size of the current operand.
12157 Z -- likewise, with special suffixes for x87 instructions.
12158 * -- print a star (in certain assembler syntax)
12159 A -- print an absolute memory reference.
12160 w -- print the operand as if it's a "word" (HImode) even if it isn't.
12161 s -- print a shift double count, followed by the assemblers argument
12163 b -- print the QImode name of the register for the indicated operand.
12164 %b0 would print %al if operands[0] is reg 0.
12165 w -- likewise, print the HImode name of the register.
12166 k -- likewise, print the SImode name of the register.
12167 q -- likewise, print the DImode name of the register.
12168 x -- likewise, print the V4SFmode name of the register.
12169 t -- likewise, print the V8SFmode name of the register.
12170 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
12171 y -- print "st(0)" instead of "st" as a register.
12172 d -- print duplicated register operand for AVX instruction.
12173 D -- print condition for SSE cmp instruction.
12174 P -- if PIC, print an @PLT suffix.
12175 X -- don't print any sort of PIC '@' suffix for a symbol.
12176 & -- print some in-use local-dynamic symbol name.
12177 H -- print a memory address offset by 8; used for sse high-parts
12178 Y -- print condition for XOP pcom* instruction.
12179 + -- print a branch hint as 'cs' or 'ds' prefix
12180 ; -- print a semicolon (after prefixes due to bug in older gas).
12184 ix86_print_operand (FILE *file, rtx x, int code)
12191 if (ASSEMBLER_DIALECT == ASM_ATT)
12197 const char *name = get_some_local_dynamic_name ();
12199 output_operand_lossage ("'%%&' used without any "
12200 "local dynamic TLS references");
12202 assemble_name (file, name);
12207 switch (ASSEMBLER_DIALECT)
12214 /* Intel syntax. For absolute addresses, registers should not
12215 be surrounded by braces. */
12219 ix86_print_operand (file, x, 0);
12226 gcc_unreachable ();
12229 ix86_print_operand (file, x, 0);
12234 if (ASSEMBLER_DIALECT == ASM_ATT)
12239 if (ASSEMBLER_DIALECT == ASM_ATT)
12244 if (ASSEMBLER_DIALECT == ASM_ATT)
12249 if (ASSEMBLER_DIALECT == ASM_ATT)
12254 if (ASSEMBLER_DIALECT == ASM_ATT)
12259 if (ASSEMBLER_DIALECT == ASM_ATT)
12264 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12266 /* Opcodes don't get size suffixes if using Intel opcodes. */
12267 if (ASSEMBLER_DIALECT == ASM_INTEL)
12270 switch (GET_MODE_SIZE (GET_MODE (x)))
12289 output_operand_lossage
12290 ("invalid operand size for operand code '%c'", code);
12295 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12297 (0, "non-integer operand used with operand code '%c'", code);
12301 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
12302 if (ASSEMBLER_DIALECT == ASM_INTEL)
12305 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12307 switch (GET_MODE_SIZE (GET_MODE (x)))
12310 #ifdef HAVE_AS_IX86_FILDS
12320 #ifdef HAVE_AS_IX86_FILDQ
12323 fputs ("ll", file);
12331 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12333 /* 387 opcodes don't get size suffixes
12334 if the operands are registers. */
12335 if (STACK_REG_P (x))
12338 switch (GET_MODE_SIZE (GET_MODE (x)))
12359 output_operand_lossage
12360 ("invalid operand type used with operand code '%c'", code);
12364 output_operand_lossage
12365 ("invalid operand size for operand code '%c'", code);
12382 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
12384 ix86_print_operand (file, x, 0);
12385 fputs (", ", file);
12390 /* Little bit of braindamage here. The SSE compare instructions
12391 does use completely different names for the comparisons that the
12392 fp conditional moves. */
12395 switch (GET_CODE (x))
12398 fputs ("eq", file);
12401 fputs ("eq_us", file);
12404 fputs ("lt", file);
12407 fputs ("nge", file);
12410 fputs ("le", file);
12413 fputs ("ngt", file);
12416 fputs ("unord", file);
12419 fputs ("neq", file);
12422 fputs ("neq_oq", file);
12425 fputs ("ge", file);
12428 fputs ("nlt", file);
12431 fputs ("gt", file);
12434 fputs ("nle", file);
12437 fputs ("ord", file);
12440 output_operand_lossage ("operand is not a condition code, "
12441 "invalid operand code 'D'");
12447 switch (GET_CODE (x))
12451 fputs ("eq", file);
12455 fputs ("lt", file);
12459 fputs ("le", file);
12462 fputs ("unord", file);
12466 fputs ("neq", file);
12470 fputs ("nlt", file);
12474 fputs ("nle", file);
12477 fputs ("ord", file);
12480 output_operand_lossage ("operand is not a condition code, "
12481 "invalid operand code 'D'");
12487 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12488 if (ASSEMBLER_DIALECT == ASM_ATT)
12490 switch (GET_MODE (x))
12492 case HImode: putc ('w', file); break;
12494 case SFmode: putc ('l', file); break;
12496 case DFmode: putc ('q', file); break;
12497 default: gcc_unreachable ();
12504 if (!COMPARISON_P (x))
12506 output_operand_lossage ("operand is neither a constant nor a "
12507 "condition code, invalid operand code "
12511 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
12514 if (!COMPARISON_P (x))
12516 output_operand_lossage ("operand is neither a constant nor a "
12517 "condition code, invalid operand code "
12521 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12522 if (ASSEMBLER_DIALECT == ASM_ATT)
12525 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
12528 /* Like above, but reverse condition */
12530 /* Check to see if argument to %c is really a constant
12531 and not a condition code which needs to be reversed. */
12532 if (!COMPARISON_P (x))
12534 output_operand_lossage ("operand is neither a constant nor a "
12535 "condition code, invalid operand "
12539 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
12542 if (!COMPARISON_P (x))
12544 output_operand_lossage ("operand is neither a constant nor a "
12545 "condition code, invalid operand "
12549 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12550 if (ASSEMBLER_DIALECT == ASM_ATT)
12553 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
12557 /* It doesn't actually matter what mode we use here, as we're
12558 only going to use this for printing. */
12559 x = adjust_address_nv (x, DImode, 8);
12567 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
12570 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
12573 int pred_val = INTVAL (XEXP (x, 0));
12575 if (pred_val < REG_BR_PROB_BASE * 45 / 100
12576 || pred_val > REG_BR_PROB_BASE * 55 / 100)
12578 int taken = pred_val > REG_BR_PROB_BASE / 2;
12579 int cputaken = final_forward_branch_p (current_output_insn) == 0;
12581 /* Emit hints only in the case default branch prediction
12582 heuristics would fail. */
12583 if (taken != cputaken)
12585 /* We use 3e (DS) prefix for taken branches and
12586 2e (CS) prefix for not taken branches. */
12588 fputs ("ds ; ", file);
12590 fputs ("cs ; ", file);
12598 switch (GET_CODE (x))
12601 fputs ("neq", file);
12604 fputs ("eq", file);
12608 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
12612 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
12616 fputs ("le", file);
12620 fputs ("lt", file);
12623 fputs ("unord", file);
12626 fputs ("ord", file);
12629 fputs ("ueq", file);
12632 fputs ("nlt", file);
12635 fputs ("nle", file);
12638 fputs ("ule", file);
12641 fputs ("ult", file);
12644 fputs ("une", file);
12647 output_operand_lossage ("operand is not a condition code, "
12648 "invalid operand code 'Y'");
12654 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
12660 output_operand_lossage ("invalid operand code '%c'", code);
12665 print_reg (x, code, file);
12667 else if (MEM_P (x))
12669 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
12670 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
12671 && GET_MODE (x) != BLKmode)
12674 switch (GET_MODE_SIZE (GET_MODE (x)))
12676 case 1: size = "BYTE"; break;
12677 case 2: size = "WORD"; break;
12678 case 4: size = "DWORD"; break;
12679 case 8: size = "QWORD"; break;
12680 case 12: size = "TBYTE"; break;
12682 if (GET_MODE (x) == XFmode)
12687 case 32: size = "YMMWORD"; break;
12689 gcc_unreachable ();
12692 /* Check for explicit size override (codes 'b', 'w' and 'k') */
12695 else if (code == 'w')
12697 else if (code == 'k')
12700 fputs (size, file);
12701 fputs (" PTR ", file);
12705 /* Avoid (%rip) for call operands. */
12706 if (CONSTANT_ADDRESS_P (x) && code == 'P'
12707 && !CONST_INT_P (x))
12708 output_addr_const (file, x);
12709 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
12710 output_operand_lossage ("invalid constraints for operand");
12712 output_address (x);
12715 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
12720 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12721 REAL_VALUE_TO_TARGET_SINGLE (r, l);
12723 if (ASSEMBLER_DIALECT == ASM_ATT)
12725 fprintf (file, "0x%08lx", (long unsigned int) l);
12728 /* These float cases don't actually occur as immediate operands. */
12729 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
12733 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
12734 fputs (dstr, file);
12737 else if (GET_CODE (x) == CONST_DOUBLE
12738 && GET_MODE (x) == XFmode)
12742 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
12743 fputs (dstr, file);
12748 /* We have patterns that allow zero sets of memory, for instance.
12749 In 64-bit mode, we should probably support all 8-byte vectors,
12750 since we can in fact encode that into an immediate. */
12751 if (GET_CODE (x) == CONST_VECTOR)
12753 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
12759 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
12761 if (ASSEMBLER_DIALECT == ASM_ATT)
12764 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
12765 || GET_CODE (x) == LABEL_REF)
12767 if (ASSEMBLER_DIALECT == ASM_ATT)
12770 fputs ("OFFSET FLAT:", file);
12773 if (CONST_INT_P (x))
12774 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
12776 output_pic_addr_const (file, x, code);
12778 output_addr_const (file, x);
12783 ix86_print_operand_punct_valid_p (unsigned char code)
12785 return (code == '*' || code == '+' || code == '&' || code == ';');
12788 /* Print a memory operand whose address is ADDR. */
12791 ix86_print_operand_address (FILE *file, rtx addr)
12793 struct ix86_address parts;
12794 rtx base, index, disp;
12796 int ok = ix86_decompose_address (addr, &parts);
12801 index = parts.index;
12803 scale = parts.scale;
12811 if (ASSEMBLER_DIALECT == ASM_ATT)
12813 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
12816 gcc_unreachable ();
12819 /* Use one byte shorter RIP relative addressing for 64bit mode. */
12820 if (TARGET_64BIT && !base && !index)
12824 if (GET_CODE (disp) == CONST
12825 && GET_CODE (XEXP (disp, 0)) == PLUS
12826 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
12827 symbol = XEXP (XEXP (disp, 0), 0);
12829 if (GET_CODE (symbol) == LABEL_REF
12830 || (GET_CODE (symbol) == SYMBOL_REF
12831 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
12834 if (!base && !index)
12836 /* Displacement only requires special attention. */
12838 if (CONST_INT_P (disp))
12840 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
12841 fputs ("ds:", file);
12842 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
12845 output_pic_addr_const (file, disp, 0);
12847 output_addr_const (file, disp);
12851 if (ASSEMBLER_DIALECT == ASM_ATT)
12856 output_pic_addr_const (file, disp, 0);
12857 else if (GET_CODE (disp) == LABEL_REF)
12858 output_asm_label (disp);
12860 output_addr_const (file, disp);
12865 print_reg (base, 0, file);
12869 print_reg (index, 0, file);
12871 fprintf (file, ",%d", scale);
12877 rtx offset = NULL_RTX;
12881 /* Pull out the offset of a symbol; print any symbol itself. */
12882 if (GET_CODE (disp) == CONST
12883 && GET_CODE (XEXP (disp, 0)) == PLUS
12884 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
12886 offset = XEXP (XEXP (disp, 0), 1);
12887 disp = gen_rtx_CONST (VOIDmode,
12888 XEXP (XEXP (disp, 0), 0));
12892 output_pic_addr_const (file, disp, 0);
12893 else if (GET_CODE (disp) == LABEL_REF)
12894 output_asm_label (disp);
12895 else if (CONST_INT_P (disp))
12898 output_addr_const (file, disp);
12904 print_reg (base, 0, file);
12907 if (INTVAL (offset) >= 0)
12909 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
12913 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
12920 print_reg (index, 0, file);
12922 fprintf (file, "*%d", scale);
12930 output_addr_const_extra (FILE *file, rtx x)
12934 if (GET_CODE (x) != UNSPEC)
12937 op = XVECEXP (x, 0, 0);
12938 switch (XINT (x, 1))
12940 case UNSPEC_GOTTPOFF:
12941 output_addr_const (file, op);
12942 /* FIXME: This might be @TPOFF in Sun ld. */
12943 fputs ("@gottpoff", file);
12946 output_addr_const (file, op);
12947 fputs ("@tpoff", file);
12949 case UNSPEC_NTPOFF:
12950 output_addr_const (file, op);
12952 fputs ("@tpoff", file);
12954 fputs ("@ntpoff", file);
12956 case UNSPEC_DTPOFF:
12957 output_addr_const (file, op);
12958 fputs ("@dtpoff", file);
12960 case UNSPEC_GOTNTPOFF:
12961 output_addr_const (file, op);
12963 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12964 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
12966 fputs ("@gotntpoff", file);
12968 case UNSPEC_INDNTPOFF:
12969 output_addr_const (file, op);
12970 fputs ("@indntpoff", file);
12973 case UNSPEC_MACHOPIC_OFFSET:
12974 output_addr_const (file, op);
12976 machopic_output_function_base_name (file);
12987 /* Split one or more DImode RTL references into pairs of SImode
12988 references. The RTL can be REG, offsettable MEM, integer constant, or
12989 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
12990 split and "num" is its length. lo_half and hi_half are output arrays
12991 that parallel "operands". */
12994 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
12998 rtx op = operands[num];
13000 /* simplify_subreg refuse to split volatile memory addresses,
13001 but we still have to handle it. */
13004 lo_half[num] = adjust_address (op, SImode, 0);
13005 hi_half[num] = adjust_address (op, SImode, 4);
13009 lo_half[num] = simplify_gen_subreg (SImode, op,
13010 GET_MODE (op) == VOIDmode
13011 ? DImode : GET_MODE (op), 0);
13012 hi_half[num] = simplify_gen_subreg (SImode, op,
13013 GET_MODE (op) == VOIDmode
13014 ? DImode : GET_MODE (op), 4);
13018 /* Split one or more TImode RTL references into pairs of DImode
13019 references. The RTL can be REG, offsettable MEM, integer constant, or
13020 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
13021 split and "num" is its length. lo_half and hi_half are output arrays
13022 that parallel "operands". */
13025 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
13029 rtx op = operands[num];
13031 /* simplify_subreg refuse to split volatile memory addresses, but we
13032 still have to handle it. */
13035 lo_half[num] = adjust_address (op, DImode, 0);
13036 hi_half[num] = adjust_address (op, DImode, 8);
13040 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
13041 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
13046 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
13047 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
13048 is the expression of the binary operation. The output may either be
13049 emitted here, or returned to the caller, like all output_* functions.
13051 There is no guarantee that the operands are the same mode, as they
13052 might be within FLOAT or FLOAT_EXTEND expressions. */
13054 #ifndef SYSV386_COMPAT
13055 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
13056 wants to fix the assemblers because that causes incompatibility
13057 with gcc. No-one wants to fix gcc because that causes
13058 incompatibility with assemblers... You can use the option of
13059 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
13060 #define SYSV386_COMPAT 1
13064 output_387_binary_op (rtx insn, rtx *operands)
13066 static char buf[40];
13069 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
13071 #ifdef ENABLE_CHECKING
13072 /* Even if we do not want to check the inputs, this documents input
13073 constraints. Which helps in understanding the following code. */
13074 if (STACK_REG_P (operands[0])
13075 && ((REG_P (operands[1])
13076 && REGNO (operands[0]) == REGNO (operands[1])
13077 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
13078 || (REG_P (operands[2])
13079 && REGNO (operands[0]) == REGNO (operands[2])
13080 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
13081 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
13084 gcc_assert (is_sse);
13087 switch (GET_CODE (operands[3]))
13090 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13091 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13099 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13100 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13108 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13109 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13117 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13118 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13126 gcc_unreachable ();
13133 strcpy (buf, ssep);
13134 if (GET_MODE (operands[0]) == SFmode)
13135 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
13137 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
13141 strcpy (buf, ssep + 1);
13142 if (GET_MODE (operands[0]) == SFmode)
13143 strcat (buf, "ss\t{%2, %0|%0, %2}");
13145 strcat (buf, "sd\t{%2, %0|%0, %2}");
13151 switch (GET_CODE (operands[3]))
13155 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
13157 rtx temp = operands[2];
13158 operands[2] = operands[1];
13159 operands[1] = temp;
13162 /* know operands[0] == operands[1]. */
13164 if (MEM_P (operands[2]))
13170 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13172 if (STACK_TOP_P (operands[0]))
13173 /* How is it that we are storing to a dead operand[2]?
13174 Well, presumably operands[1] is dead too. We can't
13175 store the result to st(0) as st(0) gets popped on this
13176 instruction. Instead store to operands[2] (which I
13177 think has to be st(1)). st(1) will be popped later.
13178 gcc <= 2.8.1 didn't have this check and generated
13179 assembly code that the Unixware assembler rejected. */
13180 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13182 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13186 if (STACK_TOP_P (operands[0]))
13187 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13189 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13194 if (MEM_P (operands[1]))
13200 if (MEM_P (operands[2]))
13206 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13209 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
13210 derived assemblers, confusingly reverse the direction of
13211 the operation for fsub{r} and fdiv{r} when the
13212 destination register is not st(0). The Intel assembler
13213 doesn't have this brain damage. Read !SYSV386_COMPAT to
13214 figure out what the hardware really does. */
13215 if (STACK_TOP_P (operands[0]))
13216 p = "{p\t%0, %2|rp\t%2, %0}";
13218 p = "{rp\t%2, %0|p\t%0, %2}";
13220 if (STACK_TOP_P (operands[0]))
13221 /* As above for fmul/fadd, we can't store to st(0). */
13222 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13224 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13229 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
13232 if (STACK_TOP_P (operands[0]))
13233 p = "{rp\t%0, %1|p\t%1, %0}";
13235 p = "{p\t%1, %0|rp\t%0, %1}";
13237 if (STACK_TOP_P (operands[0]))
13238 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
13240 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
13245 if (STACK_TOP_P (operands[0]))
13247 if (STACK_TOP_P (operands[1]))
13248 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13250 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
13253 else if (STACK_TOP_P (operands[1]))
13256 p = "{\t%1, %0|r\t%0, %1}";
13258 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
13264 p = "{r\t%2, %0|\t%0, %2}";
13266 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13272 gcc_unreachable ();
13279 /* Return needed mode for entity in optimize_mode_switching pass. */
13282 ix86_mode_needed (int entity, rtx insn)
13284 enum attr_i387_cw mode;
13286 /* The mode UNINITIALIZED is used to store control word after a
13287 function call or ASM pattern. The mode ANY specify that function
13288 has no requirements on the control word and make no changes in the
13289 bits we are interested in. */
13292 || (NONJUMP_INSN_P (insn)
13293 && (asm_noperands (PATTERN (insn)) >= 0
13294 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
13295 return I387_CW_UNINITIALIZED;
13297 if (recog_memoized (insn) < 0)
13298 return I387_CW_ANY;
13300 mode = get_attr_i387_cw (insn);
13305 if (mode == I387_CW_TRUNC)
13310 if (mode == I387_CW_FLOOR)
13315 if (mode == I387_CW_CEIL)
13320 if (mode == I387_CW_MASK_PM)
13325 gcc_unreachable ();
13328 return I387_CW_ANY;
13331 /* Output code to initialize control word copies used by trunc?f?i and
13332 rounding patterns. CURRENT_MODE is set to current control word,
13333 while NEW_MODE is set to new control word. */
13336 emit_i387_cw_initialization (int mode)
13338 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
13341 enum ix86_stack_slot slot;
13343 rtx reg = gen_reg_rtx (HImode);
13345 emit_insn (gen_x86_fnstcw_1 (stored_mode));
13346 emit_move_insn (reg, copy_rtx (stored_mode));
13348 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
13349 || optimize_function_for_size_p (cfun))
13353 case I387_CW_TRUNC:
13354 /* round toward zero (truncate) */
13355 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
13356 slot = SLOT_CW_TRUNC;
13359 case I387_CW_FLOOR:
13360 /* round down toward -oo */
13361 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
13362 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
13363 slot = SLOT_CW_FLOOR;
13367 /* round up toward +oo */
13368 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
13369 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
13370 slot = SLOT_CW_CEIL;
13373 case I387_CW_MASK_PM:
13374 /* mask precision exception for nearbyint() */
13375 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
13376 slot = SLOT_CW_MASK_PM;
13380 gcc_unreachable ();
13387 case I387_CW_TRUNC:
13388 /* round toward zero (truncate) */
13389 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
13390 slot = SLOT_CW_TRUNC;
13393 case I387_CW_FLOOR:
13394 /* round down toward -oo */
13395 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
13396 slot = SLOT_CW_FLOOR;
13400 /* round up toward +oo */
13401 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
13402 slot = SLOT_CW_CEIL;
13405 case I387_CW_MASK_PM:
13406 /* mask precision exception for nearbyint() */
13407 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
13408 slot = SLOT_CW_MASK_PM;
13412 gcc_unreachable ();
13416 gcc_assert (slot < MAX_386_STACK_LOCALS);
13418 new_mode = assign_386_stack_local (HImode, slot);
13419 emit_move_insn (new_mode, reg);
13422 /* Output code for INSN to convert a float to a signed int. OPERANDS
13423 are the insn operands. The output may be [HSD]Imode and the input
13424 operand may be [SDX]Fmode. */
13427 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
13429 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
13430 int dimode_p = GET_MODE (operands[0]) == DImode;
13431 int round_mode = get_attr_i387_cw (insn);
13433 /* Jump through a hoop or two for DImode, since the hardware has no
13434 non-popping instruction. We used to do this a different way, but
13435 that was somewhat fragile and broke with post-reload splitters. */
13436 if ((dimode_p || fisttp) && !stack_top_dies)
13437 output_asm_insn ("fld\t%y1", operands);
13439 gcc_assert (STACK_TOP_P (operands[1]));
13440 gcc_assert (MEM_P (operands[0]));
13441 gcc_assert (GET_MODE (operands[1]) != TFmode);
13444 output_asm_insn ("fisttp%Z0\t%0", operands);
13447 if (round_mode != I387_CW_ANY)
13448 output_asm_insn ("fldcw\t%3", operands);
13449 if (stack_top_dies || dimode_p)
13450 output_asm_insn ("fistp%Z0\t%0", operands);
13452 output_asm_insn ("fist%Z0\t%0", operands);
13453 if (round_mode != I387_CW_ANY)
13454 output_asm_insn ("fldcw\t%2", operands);
13460 /* Output code for x87 ffreep insn. The OPNO argument, which may only
13461 have the values zero or one, indicates the ffreep insn's operand
13462 from the OPERANDS array. */
13464 static const char *
13465 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
13467 if (TARGET_USE_FFREEP)
13468 #ifdef HAVE_AS_IX86_FFREEP
13469 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
13472 static char retval[32];
13473 int regno = REGNO (operands[opno]);
13475 gcc_assert (FP_REGNO_P (regno));
13477 regno -= FIRST_STACK_REG;
13479 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
13484 return opno ? "fstp\t%y1" : "fstp\t%y0";
13488 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
13489 should be used. UNORDERED_P is true when fucom should be used. */
13492 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
13494 int stack_top_dies;
13495 rtx cmp_op0, cmp_op1;
13496 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
13500 cmp_op0 = operands[0];
13501 cmp_op1 = operands[1];
13505 cmp_op0 = operands[1];
13506 cmp_op1 = operands[2];
13511 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
13512 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
13513 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
13514 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
13516 if (GET_MODE (operands[0]) == SFmode)
13518 return &ucomiss[TARGET_AVX ? 0 : 1];
13520 return &comiss[TARGET_AVX ? 0 : 1];
13523 return &ucomisd[TARGET_AVX ? 0 : 1];
13525 return &comisd[TARGET_AVX ? 0 : 1];
13528 gcc_assert (STACK_TOP_P (cmp_op0));
13530 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
13532 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
13534 if (stack_top_dies)
13536 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
13537 return output_387_ffreep (operands, 1);
13540 return "ftst\n\tfnstsw\t%0";
13543 if (STACK_REG_P (cmp_op1)
13545 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
13546 && REGNO (cmp_op1) != FIRST_STACK_REG)
13548 /* If both the top of the 387 stack dies, and the other operand
13549 is also a stack register that dies, then this must be a
13550 `fcompp' float compare */
13554 /* There is no double popping fcomi variant. Fortunately,
13555 eflags is immune from the fstp's cc clobbering. */
13557 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
13559 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
13560 return output_387_ffreep (operands, 0);
13565 return "fucompp\n\tfnstsw\t%0";
13567 return "fcompp\n\tfnstsw\t%0";
13572 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
13574 static const char * const alt[16] =
13576 "fcom%Z2\t%y2\n\tfnstsw\t%0",
13577 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
13578 "fucom%Z2\t%y2\n\tfnstsw\t%0",
13579 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
13581 "ficom%Z2\t%y2\n\tfnstsw\t%0",
13582 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
13586 "fcomi\t{%y1, %0|%0, %y1}",
13587 "fcomip\t{%y1, %0|%0, %y1}",
13588 "fucomi\t{%y1, %0|%0, %y1}",
13589 "fucomip\t{%y1, %0|%0, %y1}",
13600 mask = eflags_p << 3;
13601 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
13602 mask |= unordered_p << 1;
13603 mask |= stack_top_dies;
13605 gcc_assert (mask < 16);
13614 ix86_output_addr_vec_elt (FILE *file, int value)
13616 const char *directive = ASM_LONG;
13620 directive = ASM_QUAD;
13622 gcc_assert (!TARGET_64BIT);
13625 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
13629 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
13631 const char *directive = ASM_LONG;
13634 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
13635 directive = ASM_QUAD;
13637 gcc_assert (!TARGET_64BIT);
13639 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
13640 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
13641 fprintf (file, "%s%s%d-%s%d\n",
13642 directive, LPREFIX, value, LPREFIX, rel);
13643 else if (HAVE_AS_GOTOFF_IN_DATA)
13644 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
13646 else if (TARGET_MACHO)
13648 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
13649 machopic_output_function_base_name (file);
13654 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
13655 GOT_SYMBOL_NAME, LPREFIX, value);
13658 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
13662 ix86_expand_clear (rtx dest)
13666 /* We play register width games, which are only valid after reload. */
13667 gcc_assert (reload_completed);
13669 /* Avoid HImode and its attendant prefix byte. */
13670 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
13671 dest = gen_rtx_REG (SImode, REGNO (dest));
13672 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
13674 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
13675 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
13677 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13678 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
13684 /* X is an unchanging MEM. If it is a constant pool reference, return
13685 the constant pool rtx, else NULL. */
13688 maybe_get_pool_constant (rtx x)
13690 x = ix86_delegitimize_address (XEXP (x, 0));
13692 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
13693 return get_pool_constant (x);
13699 ix86_expand_move (enum machine_mode mode, rtx operands[])
13702 enum tls_model model;
13707 if (GET_CODE (op1) == SYMBOL_REF)
13709 model = SYMBOL_REF_TLS_MODEL (op1);
13712 op1 = legitimize_tls_address (op1, model, true);
13713 op1 = force_operand (op1, op0);
13717 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
13718 && SYMBOL_REF_DLLIMPORT_P (op1))
13719 op1 = legitimize_dllimport_symbol (op1, false);
13721 else if (GET_CODE (op1) == CONST
13722 && GET_CODE (XEXP (op1, 0)) == PLUS
13723 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
13725 rtx addend = XEXP (XEXP (op1, 0), 1);
13726 rtx symbol = XEXP (XEXP (op1, 0), 0);
13729 model = SYMBOL_REF_TLS_MODEL (symbol);
13731 tmp = legitimize_tls_address (symbol, model, true);
13732 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
13733 && SYMBOL_REF_DLLIMPORT_P (symbol))
13734 tmp = legitimize_dllimport_symbol (symbol, true);
13738 tmp = force_operand (tmp, NULL);
13739 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
13740 op0, 1, OPTAB_DIRECT);
13746 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
13748 if (TARGET_MACHO && !TARGET_64BIT)
13753 rtx temp = ((reload_in_progress
13754 || ((op0 && REG_P (op0))
13756 ? op0 : gen_reg_rtx (Pmode));
13757 op1 = machopic_indirect_data_reference (op1, temp);
13758 op1 = machopic_legitimize_pic_address (op1, mode,
13759 temp == op1 ? 0 : temp);
13761 else if (MACHOPIC_INDIRECT)
13762 op1 = machopic_indirect_data_reference (op1, 0);
13770 op1 = force_reg (Pmode, op1);
13771 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
13773 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
13774 op1 = legitimize_pic_address (op1, reg);
13783 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
13784 || !push_operand (op0, mode))
13786 op1 = force_reg (mode, op1);
13788 if (push_operand (op0, mode)
13789 && ! general_no_elim_operand (op1, mode))
13790 op1 = copy_to_mode_reg (mode, op1);
13792 /* Force large constants in 64bit compilation into register
13793 to get them CSEed. */
13794 if (can_create_pseudo_p ()
13795 && (mode == DImode) && TARGET_64BIT
13796 && immediate_operand (op1, mode)
13797 && !x86_64_zext_immediate_operand (op1, VOIDmode)
13798 && !register_operand (op0, mode)
13800 op1 = copy_to_mode_reg (mode, op1);
13802 if (can_create_pseudo_p ()
13803 && FLOAT_MODE_P (mode)
13804 && GET_CODE (op1) == CONST_DOUBLE)
13806 /* If we are loading a floating point constant to a register,
13807 force the value to memory now, since we'll get better code
13808 out the back end. */
13810 op1 = validize_mem (force_const_mem (mode, op1));
13811 if (!register_operand (op0, mode))
13813 rtx temp = gen_reg_rtx (mode);
13814 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
13815 emit_move_insn (op0, temp);
13821 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
13825 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
13827 rtx op0 = operands[0], op1 = operands[1];
13828 unsigned int align = GET_MODE_ALIGNMENT (mode);
13830 /* Force constants other than zero into memory. We do not know how
13831 the instructions used to build constants modify the upper 64 bits
13832 of the register, once we have that information we may be able
13833 to handle some of them more efficiently. */
13834 if (can_create_pseudo_p ()
13835 && register_operand (op0, mode)
13836 && (CONSTANT_P (op1)
13837 || (GET_CODE (op1) == SUBREG
13838 && CONSTANT_P (SUBREG_REG (op1))))
13839 && !standard_sse_constant_p (op1))
13840 op1 = validize_mem (force_const_mem (mode, op1));
13842 /* We need to check memory alignment for SSE mode since attribute
13843 can make operands unaligned. */
13844 if (can_create_pseudo_p ()
13845 && SSE_REG_MODE_P (mode)
13846 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
13847 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
13851 /* ix86_expand_vector_move_misalign() does not like constants ... */
13852 if (CONSTANT_P (op1)
13853 || (GET_CODE (op1) == SUBREG
13854 && CONSTANT_P (SUBREG_REG (op1))))
13855 op1 = validize_mem (force_const_mem (mode, op1));
13857 /* ... nor both arguments in memory. */
13858 if (!register_operand (op0, mode)
13859 && !register_operand (op1, mode))
13860 op1 = force_reg (mode, op1);
13862 tmp[0] = op0; tmp[1] = op1;
13863 ix86_expand_vector_move_misalign (mode, tmp);
13867 /* Make operand1 a register if it isn't already. */
13868 if (can_create_pseudo_p ()
13869 && !register_operand (op0, mode)
13870 && !register_operand (op1, mode))
13872 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
13876 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
13879 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
13880 straight to ix86_expand_vector_move. */
13881 /* Code generation for scalar reg-reg moves of single and double precision data:
13882 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
13886 if (x86_sse_partial_reg_dependency == true)
13891 Code generation for scalar loads of double precision data:
13892 if (x86_sse_split_regs == true)
13893 movlpd mem, reg (gas syntax)
13897 Code generation for unaligned packed loads of single precision data
13898 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
13899 if (x86_sse_unaligned_move_optimal)
13902 if (x86_sse_partial_reg_dependency == true)
13914 Code generation for unaligned packed loads of double precision data
13915 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
13916 if (x86_sse_unaligned_move_optimal)
13919 if (x86_sse_split_regs == true)
13932 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
13941 switch (GET_MODE_CLASS (mode))
13943 case MODE_VECTOR_INT:
13945 switch (GET_MODE_SIZE (mode))
13948 /* If we're optimizing for size, movups is the smallest. */
13949 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
13951 op0 = gen_lowpart (V4SFmode, op0);
13952 op1 = gen_lowpart (V4SFmode, op1);
13953 emit_insn (gen_avx_movups (op0, op1));
13956 op0 = gen_lowpart (V16QImode, op0);
13957 op1 = gen_lowpart (V16QImode, op1);
13958 emit_insn (gen_avx_movdqu (op0, op1));
13961 op0 = gen_lowpart (V32QImode, op0);
13962 op1 = gen_lowpart (V32QImode, op1);
13963 emit_insn (gen_avx_movdqu256 (op0, op1));
13966 gcc_unreachable ();
13969 case MODE_VECTOR_FLOAT:
13970 op0 = gen_lowpart (mode, op0);
13971 op1 = gen_lowpart (mode, op1);
13976 emit_insn (gen_avx_movups (op0, op1));
13979 emit_insn (gen_avx_movups256 (op0, op1));
13982 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
13984 op0 = gen_lowpart (V4SFmode, op0);
13985 op1 = gen_lowpart (V4SFmode, op1);
13986 emit_insn (gen_avx_movups (op0, op1));
13989 emit_insn (gen_avx_movupd (op0, op1));
13992 emit_insn (gen_avx_movupd256 (op0, op1));
13995 gcc_unreachable ();
14000 gcc_unreachable ();
14008 /* If we're optimizing for size, movups is the smallest. */
14009 if (optimize_insn_for_size_p ()
14010 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14012 op0 = gen_lowpart (V4SFmode, op0);
14013 op1 = gen_lowpart (V4SFmode, op1);
14014 emit_insn (gen_sse_movups (op0, op1));
14018 /* ??? If we have typed data, then it would appear that using
14019 movdqu is the only way to get unaligned data loaded with
14021 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14023 op0 = gen_lowpart (V16QImode, op0);
14024 op1 = gen_lowpart (V16QImode, op1);
14025 emit_insn (gen_sse2_movdqu (op0, op1));
14029 if (TARGET_SSE2 && mode == V2DFmode)
14033 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
14035 op0 = gen_lowpart (V2DFmode, op0);
14036 op1 = gen_lowpart (V2DFmode, op1);
14037 emit_insn (gen_sse2_movupd (op0, op1));
14041 /* When SSE registers are split into halves, we can avoid
14042 writing to the top half twice. */
14043 if (TARGET_SSE_SPLIT_REGS)
14045 emit_clobber (op0);
14050 /* ??? Not sure about the best option for the Intel chips.
14051 The following would seem to satisfy; the register is
14052 entirely cleared, breaking the dependency chain. We
14053 then store to the upper half, with a dependency depth
14054 of one. A rumor has it that Intel recommends two movsd
14055 followed by an unpacklpd, but this is unconfirmed. And
14056 given that the dependency depth of the unpacklpd would
14057 still be one, I'm not sure why this would be better. */
14058 zero = CONST0_RTX (V2DFmode);
14061 m = adjust_address (op1, DFmode, 0);
14062 emit_insn (gen_sse2_loadlpd (op0, zero, m));
14063 m = adjust_address (op1, DFmode, 8);
14064 emit_insn (gen_sse2_loadhpd (op0, op0, m));
14068 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
14070 op0 = gen_lowpart (V4SFmode, op0);
14071 op1 = gen_lowpart (V4SFmode, op1);
14072 emit_insn (gen_sse_movups (op0, op1));
14076 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
14077 emit_move_insn (op0, CONST0_RTX (mode));
14079 emit_clobber (op0);
14081 if (mode != V4SFmode)
14082 op0 = gen_lowpart (V4SFmode, op0);
14083 m = adjust_address (op1, V2SFmode, 0);
14084 emit_insn (gen_sse_loadlps (op0, op0, m));
14085 m = adjust_address (op1, V2SFmode, 8);
14086 emit_insn (gen_sse_loadhps (op0, op0, m));
14089 else if (MEM_P (op0))
14091 /* If we're optimizing for size, movups is the smallest. */
14092 if (optimize_insn_for_size_p ()
14093 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14095 op0 = gen_lowpart (V4SFmode, op0);
14096 op1 = gen_lowpart (V4SFmode, op1);
14097 emit_insn (gen_sse_movups (op0, op1));
14101 /* ??? Similar to above, only less clear because of quote
14102 typeless stores unquote. */
14103 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
14104 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14106 op0 = gen_lowpart (V16QImode, op0);
14107 op1 = gen_lowpart (V16QImode, op1);
14108 emit_insn (gen_sse2_movdqu (op0, op1));
14112 if (TARGET_SSE2 && mode == V2DFmode)
14114 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
14116 op0 = gen_lowpart (V2DFmode, op0);
14117 op1 = gen_lowpart (V2DFmode, op1);
14118 emit_insn (gen_sse2_movupd (op0, op1));
14122 m = adjust_address (op0, DFmode, 0);
14123 emit_insn (gen_sse2_storelpd (m, op1));
14124 m = adjust_address (op0, DFmode, 8);
14125 emit_insn (gen_sse2_storehpd (m, op1));
14130 if (mode != V4SFmode)
14131 op1 = gen_lowpart (V4SFmode, op1);
14133 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
14135 op0 = gen_lowpart (V4SFmode, op0);
14136 emit_insn (gen_sse_movups (op0, op1));
14140 m = adjust_address (op0, V2SFmode, 0);
14141 emit_insn (gen_sse_storelps (m, op1));
14142 m = adjust_address (op0, V2SFmode, 8);
14143 emit_insn (gen_sse_storehps (m, op1));
14148 gcc_unreachable ();
14151 /* Expand a push in MODE. This is some mode for which we do not support
14152 proper push instructions, at least from the registers that we expect
14153 the value to live in. */
14156 ix86_expand_push (enum machine_mode mode, rtx x)
14160 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
14161 GEN_INT (-GET_MODE_SIZE (mode)),
14162 stack_pointer_rtx, 1, OPTAB_DIRECT);
14163 if (tmp != stack_pointer_rtx)
14164 emit_move_insn (stack_pointer_rtx, tmp);
14166 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
14168 /* When we push an operand onto stack, it has to be aligned at least
14169 at the function argument boundary. However since we don't have
14170 the argument type, we can't determine the actual argument
14172 emit_move_insn (tmp, x);
14175 /* Helper function of ix86_fixup_binary_operands to canonicalize
14176 operand order. Returns true if the operands should be swapped. */
14179 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
14182 rtx dst = operands[0];
14183 rtx src1 = operands[1];
14184 rtx src2 = operands[2];
14186 /* If the operation is not commutative, we can't do anything. */
14187 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
14190 /* Highest priority is that src1 should match dst. */
14191 if (rtx_equal_p (dst, src1))
14193 if (rtx_equal_p (dst, src2))
14196 /* Next highest priority is that immediate constants come second. */
14197 if (immediate_operand (src2, mode))
14199 if (immediate_operand (src1, mode))
14202 /* Lowest priority is that memory references should come second. */
14212 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
14213 destination to use for the operation. If different from the true
14214 destination in operands[0], a copy operation will be required. */
14217 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
14220 rtx dst = operands[0];
14221 rtx src1 = operands[1];
14222 rtx src2 = operands[2];
14224 /* Canonicalize operand order. */
14225 if (ix86_swap_binary_operands_p (code, mode, operands))
14229 /* It is invalid to swap operands of different modes. */
14230 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
14237 /* Both source operands cannot be in memory. */
14238 if (MEM_P (src1) && MEM_P (src2))
14240 /* Optimization: Only read from memory once. */
14241 if (rtx_equal_p (src1, src2))
14243 src2 = force_reg (mode, src2);
14247 src2 = force_reg (mode, src2);
14250 /* If the destination is memory, and we do not have matching source
14251 operands, do things in registers. */
14252 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
14253 dst = gen_reg_rtx (mode);
14255 /* Source 1 cannot be a constant. */
14256 if (CONSTANT_P (src1))
14257 src1 = force_reg (mode, src1);
14259 /* Source 1 cannot be a non-matching memory. */
14260 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
14261 src1 = force_reg (mode, src1);
14263 operands[1] = src1;
14264 operands[2] = src2;
14268 /* Similarly, but assume that the destination has already been
14269 set up properly. */
14272 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
14273 enum machine_mode mode, rtx operands[])
14275 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
14276 gcc_assert (dst == operands[0]);
14279 /* Attempt to expand a binary operator. Make the expansion closer to the
14280 actual machine, then just general_operand, which will allow 3 separate
14281 memory references (one output, two input) in a single insn. */
14284 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
14287 rtx src1, src2, dst, op, clob;
14289 dst = ix86_fixup_binary_operands (code, mode, operands);
14290 src1 = operands[1];
14291 src2 = operands[2];
14293 /* Emit the instruction. */
14295 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
14296 if (reload_in_progress)
14298 /* Reload doesn't know about the flags register, and doesn't know that
14299 it doesn't want to clobber it. We can only do this with PLUS. */
14300 gcc_assert (code == PLUS);
14305 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14306 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
14309 /* Fix up the destination if needed. */
14310 if (dst != operands[0])
14311 emit_move_insn (operands[0], dst);
14314 /* Return TRUE or FALSE depending on whether the binary operator meets the
14315 appropriate constraints. */
14318 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
14321 rtx dst = operands[0];
14322 rtx src1 = operands[1];
14323 rtx src2 = operands[2];
14325 /* Both source operands cannot be in memory. */
14326 if (MEM_P (src1) && MEM_P (src2))
14329 /* Canonicalize operand order for commutative operators. */
14330 if (ix86_swap_binary_operands_p (code, mode, operands))
14337 /* If the destination is memory, we must have a matching source operand. */
14338 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
14341 /* Source 1 cannot be a constant. */
14342 if (CONSTANT_P (src1))
14345 /* Source 1 cannot be a non-matching memory. */
14346 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
14352 /* Attempt to expand a unary operator. Make the expansion closer to the
14353 actual machine, then just general_operand, which will allow 2 separate
14354 memory references (one output, one input) in a single insn. */
14357 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
14360 int matching_memory;
14361 rtx src, dst, op, clob;
14366 /* If the destination is memory, and we do not have matching source
14367 operands, do things in registers. */
14368 matching_memory = 0;
14371 if (rtx_equal_p (dst, src))
14372 matching_memory = 1;
14374 dst = gen_reg_rtx (mode);
14377 /* When source operand is memory, destination must match. */
14378 if (MEM_P (src) && !matching_memory)
14379 src = force_reg (mode, src);
14381 /* Emit the instruction. */
14383 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
14384 if (reload_in_progress || code == NOT)
14386 /* Reload doesn't know about the flags register, and doesn't know that
14387 it doesn't want to clobber it. */
14388 gcc_assert (code == NOT);
14393 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14394 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
14397 /* Fix up the destination if needed. */
14398 if (dst != operands[0])
14399 emit_move_insn (operands[0], dst);
14402 #define LEA_SEARCH_THRESHOLD 12
14404 /* Search backward for non-agu definition of register number REGNO1
14405 or register number REGNO2 in INSN's basic block until
14406 1. Pass LEA_SEARCH_THRESHOLD instructions, or
14407 2. Reach BB boundary, or
14408 3. Reach agu definition.
14409 Returns the distance between the non-agu definition point and INSN.
14410 If no definition point, returns -1. */
14413 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
14416 basic_block bb = BLOCK_FOR_INSN (insn);
14419 enum attr_type insn_type;
14421 if (insn != BB_HEAD (bb))
14423 rtx prev = PREV_INSN (insn);
14424 while (prev && distance < LEA_SEARCH_THRESHOLD)
14426 if (NONDEBUG_INSN_P (prev))
14429 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
14430 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
14431 && !DF_REF_IS_ARTIFICIAL (*def_rec)
14432 && (regno1 == DF_REF_REGNO (*def_rec)
14433 || regno2 == DF_REF_REGNO (*def_rec)))
14435 insn_type = get_attr_type (prev);
14436 if (insn_type != TYPE_LEA)
14440 if (prev == BB_HEAD (bb))
14442 prev = PREV_INSN (prev);
14446 if (distance < LEA_SEARCH_THRESHOLD)
14450 bool simple_loop = false;
14452 FOR_EACH_EDGE (e, ei, bb->preds)
14455 simple_loop = true;
14461 rtx prev = BB_END (bb);
14464 && distance < LEA_SEARCH_THRESHOLD)
14466 if (NONDEBUG_INSN_P (prev))
14469 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
14470 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
14471 && !DF_REF_IS_ARTIFICIAL (*def_rec)
14472 && (regno1 == DF_REF_REGNO (*def_rec)
14473 || regno2 == DF_REF_REGNO (*def_rec)))
14475 insn_type = get_attr_type (prev);
14476 if (insn_type != TYPE_LEA)
14480 prev = PREV_INSN (prev);
14488 /* get_attr_type may modify recog data. We want to make sure
14489 that recog data is valid for instruction INSN, on which
14490 distance_non_agu_define is called. INSN is unchanged here. */
14491 extract_insn_cached (insn);
14495 /* Return the distance between INSN and the next insn that uses
14496 register number REGNO0 in memory address. Return -1 if no such
14497 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
14500 distance_agu_use (unsigned int regno0, rtx insn)
14502 basic_block bb = BLOCK_FOR_INSN (insn);
14507 if (insn != BB_END (bb))
14509 rtx next = NEXT_INSN (insn);
14510 while (next && distance < LEA_SEARCH_THRESHOLD)
14512 if (NONDEBUG_INSN_P (next))
14516 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
14517 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
14518 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
14519 && regno0 == DF_REF_REGNO (*use_rec))
14521 /* Return DISTANCE if OP0 is used in memory
14522 address in NEXT. */
14526 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
14527 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
14528 && !DF_REF_IS_ARTIFICIAL (*def_rec)
14529 && regno0 == DF_REF_REGNO (*def_rec))
14531 /* Return -1 if OP0 is set in NEXT. */
14535 if (next == BB_END (bb))
14537 next = NEXT_INSN (next);
14541 if (distance < LEA_SEARCH_THRESHOLD)
14545 bool simple_loop = false;
14547 FOR_EACH_EDGE (e, ei, bb->succs)
14550 simple_loop = true;
14556 rtx next = BB_HEAD (bb);
14559 && distance < LEA_SEARCH_THRESHOLD)
14561 if (NONDEBUG_INSN_P (next))
14565 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
14566 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
14567 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
14568 && regno0 == DF_REF_REGNO (*use_rec))
14570 /* Return DISTANCE if OP0 is used in memory
14571 address in NEXT. */
14575 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
14576 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
14577 && !DF_REF_IS_ARTIFICIAL (*def_rec)
14578 && regno0 == DF_REF_REGNO (*def_rec))
14580 /* Return -1 if OP0 is set in NEXT. */
14585 next = NEXT_INSN (next);
14593 /* Define this macro to tune LEA priority vs ADD, it take effect when
14594 there is a dilemma of choicing LEA or ADD
14595 Negative value: ADD is more preferred than LEA
14597 Positive value: LEA is more preferred than ADD*/
14598 #define IX86_LEA_PRIORITY 2
14600 /* Return true if it is ok to optimize an ADD operation to LEA
14601 operation to avoid flag register consumation. For the processors
14602 like ATOM, if the destination register of LEA holds an actual
14603 address which will be used soon, LEA is better and otherwise ADD
14607 ix86_lea_for_add_ok (enum rtx_code code ATTRIBUTE_UNUSED,
14608 rtx insn, rtx operands[])
14610 unsigned int regno0 = true_regnum (operands[0]);
14611 unsigned int regno1 = true_regnum (operands[1]);
14612 unsigned int regno2;
14614 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
14615 return regno0 != regno1;
14617 regno2 = true_regnum (operands[2]);
14619 /* If a = b + c, (a!=b && a!=c), must use lea form. */
14620 if (regno0 != regno1 && regno0 != regno2)
14624 int dist_define, dist_use;
14625 dist_define = distance_non_agu_define (regno1, regno2, insn);
14626 if (dist_define <= 0)
14629 /* If this insn has both backward non-agu dependence and forward
14630 agu dependence, the one with short distance take effect. */
14631 dist_use = distance_agu_use (regno0, insn);
14633 || (dist_define + IX86_LEA_PRIORITY) < dist_use)
14640 /* Return true if destination reg of SET_BODY is shift count of
14644 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
14650 /* Retrieve destination of SET_BODY. */
14651 switch (GET_CODE (set_body))
14654 set_dest = SET_DEST (set_body);
14655 if (!set_dest || !REG_P (set_dest))
14659 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
14660 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
14668 /* Retrieve shift count of USE_BODY. */
14669 switch (GET_CODE (use_body))
14672 shift_rtx = XEXP (use_body, 1);
14675 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
14676 if (ix86_dep_by_shift_count_body (set_body,
14677 XVECEXP (use_body, 0, i)))
14685 && (GET_CODE (shift_rtx) == ASHIFT
14686 || GET_CODE (shift_rtx) == LSHIFTRT
14687 || GET_CODE (shift_rtx) == ASHIFTRT
14688 || GET_CODE (shift_rtx) == ROTATE
14689 || GET_CODE (shift_rtx) == ROTATERT))
14691 rtx shift_count = XEXP (shift_rtx, 1);
14693 /* Return true if shift count is dest of SET_BODY. */
14694 if (REG_P (shift_count)
14695 && true_regnum (set_dest) == true_regnum (shift_count))
14702 /* Return true if destination reg of SET_INSN is shift count of
14706 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
14708 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
14709 PATTERN (use_insn));
14712 /* Return TRUE or FALSE depending on whether the unary operator meets the
14713 appropriate constraints. */
14716 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
14717 enum machine_mode mode ATTRIBUTE_UNUSED,
14718 rtx operands[2] ATTRIBUTE_UNUSED)
14720 /* If one of operands is memory, source and destination must match. */
14721 if ((MEM_P (operands[0])
14722 || MEM_P (operands[1]))
14723 && ! rtx_equal_p (operands[0], operands[1]))
14728 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
14729 are ok, keeping in mind the possible movddup alternative. */
14732 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
14734 if (MEM_P (operands[0]))
14735 return rtx_equal_p (operands[0], operands[1 + high]);
14736 if (MEM_P (operands[1]) && MEM_P (operands[2]))
14737 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
14741 /* Post-reload splitter for converting an SF or DFmode value in an
14742 SSE register into an unsigned SImode. */
14745 ix86_split_convert_uns_si_sse (rtx operands[])
14747 enum machine_mode vecmode;
14748 rtx value, large, zero_or_two31, input, two31, x;
14750 large = operands[1];
14751 zero_or_two31 = operands[2];
14752 input = operands[3];
14753 two31 = operands[4];
14754 vecmode = GET_MODE (large);
14755 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
14757 /* Load up the value into the low element. We must ensure that the other
14758 elements are valid floats -- zero is the easiest such value. */
14761 if (vecmode == V4SFmode)
14762 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
14764 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
14768 input = gen_rtx_REG (vecmode, REGNO (input));
14769 emit_move_insn (value, CONST0_RTX (vecmode));
14770 if (vecmode == V4SFmode)
14771 emit_insn (gen_sse_movss (value, value, input));
14773 emit_insn (gen_sse2_movsd (value, value, input));
14776 emit_move_insn (large, two31);
14777 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
14779 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
14780 emit_insn (gen_rtx_SET (VOIDmode, large, x));
14782 x = gen_rtx_AND (vecmode, zero_or_two31, large);
14783 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
14785 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
14786 emit_insn (gen_rtx_SET (VOIDmode, value, x));
14788 large = gen_rtx_REG (V4SImode, REGNO (large));
14789 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
14791 x = gen_rtx_REG (V4SImode, REGNO (value));
14792 if (vecmode == V4SFmode)
14793 emit_insn (gen_sse2_cvttps2dq (x, value));
14795 emit_insn (gen_sse2_cvttpd2dq (x, value));
14798 emit_insn (gen_xorv4si3 (value, value, large));
14801 /* Convert an unsigned DImode value into a DFmode, using only SSE.
14802 Expects the 64-bit DImode to be supplied in a pair of integral
14803 registers. Requires SSE2; will use SSE3 if available. For x86_32,
14804 -mfpmath=sse, !optimize_size only. */
14807 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
14809 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
14810 rtx int_xmm, fp_xmm;
14811 rtx biases, exponents;
14814 int_xmm = gen_reg_rtx (V4SImode);
14815 if (TARGET_INTER_UNIT_MOVES)
14816 emit_insn (gen_movdi_to_sse (int_xmm, input));
14817 else if (TARGET_SSE_SPLIT_REGS)
14819 emit_clobber (int_xmm);
14820 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
14824 x = gen_reg_rtx (V2DImode);
14825 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
14826 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
14829 x = gen_rtx_CONST_VECTOR (V4SImode,
14830 gen_rtvec (4, GEN_INT (0x43300000UL),
14831 GEN_INT (0x45300000UL),
14832 const0_rtx, const0_rtx));
14833 exponents = validize_mem (force_const_mem (V4SImode, x));
14835 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
14836 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
14838 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
14839 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
14840 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
14841 (0x1.0p84 + double(fp_value_hi_xmm)).
14842 Note these exponents differ by 32. */
14844 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
14846 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
14847 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
14848 real_ldexp (&bias_lo_rvt, &dconst1, 52);
14849 real_ldexp (&bias_hi_rvt, &dconst1, 84);
14850 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
14851 x = const_double_from_real_value (bias_hi_rvt, DFmode);
14852 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
14853 biases = validize_mem (force_const_mem (V2DFmode, biases));
14854 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
14856 /* Add the upper and lower DFmode values together. */
14858 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
14861 x = copy_to_mode_reg (V2DFmode, fp_xmm);
14862 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
14863 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
14866 ix86_expand_vector_extract (false, target, fp_xmm, 0);
14869 /* Not used, but eases macroization of patterns. */
14871 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
14872 rtx input ATTRIBUTE_UNUSED)
14874 gcc_unreachable ();
14877 /* Convert an unsigned SImode value into a DFmode. Only currently used
14878 for SSE, but applicable anywhere. */
14881 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
14883 REAL_VALUE_TYPE TWO31r;
14886 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
14887 NULL, 1, OPTAB_DIRECT);
14889 fp = gen_reg_rtx (DFmode);
14890 emit_insn (gen_floatsidf2 (fp, x));
14892 real_ldexp (&TWO31r, &dconst1, 31);
14893 x = const_double_from_real_value (TWO31r, DFmode);
14895 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
14897 emit_move_insn (target, x);
14900 /* Convert a signed DImode value into a DFmode. Only used for SSE in
14901 32-bit mode; otherwise we have a direct convert instruction. */
14904 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
14906 REAL_VALUE_TYPE TWO32r;
14907 rtx fp_lo, fp_hi, x;
14909 fp_lo = gen_reg_rtx (DFmode);
14910 fp_hi = gen_reg_rtx (DFmode);
14912 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
14914 real_ldexp (&TWO32r, &dconst1, 32);
14915 x = const_double_from_real_value (TWO32r, DFmode);
14916 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
14918 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
14920 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
14923 emit_move_insn (target, x);
14926 /* Convert an unsigned SImode value into a SFmode, using only SSE.
14927 For x86_32, -mfpmath=sse, !optimize_size only. */
14929 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
14931 REAL_VALUE_TYPE ONE16r;
14932 rtx fp_hi, fp_lo, int_hi, int_lo, x;
14934 real_ldexp (&ONE16r, &dconst1, 16);
14935 x = const_double_from_real_value (ONE16r, SFmode);
14936 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
14937 NULL, 0, OPTAB_DIRECT);
14938 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
14939 NULL, 0, OPTAB_DIRECT);
14940 fp_hi = gen_reg_rtx (SFmode);
14941 fp_lo = gen_reg_rtx (SFmode);
14942 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
14943 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
14944 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
14946 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
14948 if (!rtx_equal_p (target, fp_hi))
14949 emit_move_insn (target, fp_hi);
14952 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
14953 then replicate the value for all elements of the vector
14957 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
14964 v = gen_rtvec (4, value, value, value, value);
14965 return gen_rtx_CONST_VECTOR (V4SImode, v);
14969 v = gen_rtvec (2, value, value);
14970 return gen_rtx_CONST_VECTOR (V2DImode, v);
14974 v = gen_rtvec (4, value, value, value, value);
14976 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
14977 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
14978 return gen_rtx_CONST_VECTOR (V4SFmode, v);
14982 v = gen_rtvec (2, value, value);
14984 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
14985 return gen_rtx_CONST_VECTOR (V2DFmode, v);
14988 gcc_unreachable ();
14992 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
14993 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
14994 for an SSE register. If VECT is true, then replicate the mask for
14995 all elements of the vector register. If INVERT is true, then create
14996 a mask excluding the sign bit. */
14999 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
15001 enum machine_mode vec_mode, imode;
15002 HOST_WIDE_INT hi, lo;
15007 /* Find the sign bit, sign extended to 2*HWI. */
15013 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
15014 lo = 0x80000000, hi = lo < 0;
15020 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
15021 if (HOST_BITS_PER_WIDE_INT >= 64)
15022 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
15024 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
15029 vec_mode = VOIDmode;
15030 if (HOST_BITS_PER_WIDE_INT >= 64)
15033 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
15040 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
15044 lo = ~lo, hi = ~hi;
15050 mask = immed_double_const (lo, hi, imode);
15052 vec = gen_rtvec (2, v, mask);
15053 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
15054 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
15061 gcc_unreachable ();
15065 lo = ~lo, hi = ~hi;
15067 /* Force this value into the low part of a fp vector constant. */
15068 mask = immed_double_const (lo, hi, imode);
15069 mask = gen_lowpart (mode, mask);
15071 if (vec_mode == VOIDmode)
15072 return force_reg (mode, mask);
15074 v = ix86_build_const_vector (mode, vect, mask);
15075 return force_reg (vec_mode, v);
15078 /* Generate code for floating point ABS or NEG. */
15081 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
15084 rtx mask, set, use, clob, dst, src;
15085 bool use_sse = false;
15086 bool vector_mode = VECTOR_MODE_P (mode);
15087 enum machine_mode elt_mode = mode;
15091 elt_mode = GET_MODE_INNER (mode);
15094 else if (mode == TFmode)
15096 else if (TARGET_SSE_MATH)
15097 use_sse = SSE_FLOAT_MODE_P (mode);
15099 /* NEG and ABS performed with SSE use bitwise mask operations.
15100 Create the appropriate mask now. */
15102 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
15111 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
15112 set = gen_rtx_SET (VOIDmode, dst, set);
15117 set = gen_rtx_fmt_e (code, mode, src);
15118 set = gen_rtx_SET (VOIDmode, dst, set);
15121 use = gen_rtx_USE (VOIDmode, mask);
15122 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15123 emit_insn (gen_rtx_PARALLEL (VOIDmode,
15124 gen_rtvec (3, set, use, clob)));
15131 /* Expand a copysign operation. Special case operand 0 being a constant. */
15134 ix86_expand_copysign (rtx operands[])
15136 enum machine_mode mode;
15137 rtx dest, op0, op1, mask, nmask;
15139 dest = operands[0];
15143 mode = GET_MODE (dest);
15145 if (GET_CODE (op0) == CONST_DOUBLE)
15147 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
15149 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
15150 op0 = simplify_unary_operation (ABS, mode, op0, mode);
15152 if (mode == SFmode || mode == DFmode)
15154 enum machine_mode vmode;
15156 vmode = mode == SFmode ? V4SFmode : V2DFmode;
15158 if (op0 == CONST0_RTX (mode))
15159 op0 = CONST0_RTX (vmode);
15162 rtx v = ix86_build_const_vector (mode, false, op0);
15164 op0 = force_reg (vmode, v);
15167 else if (op0 != CONST0_RTX (mode))
15168 op0 = force_reg (mode, op0);
15170 mask = ix86_build_signbit_mask (mode, 0, 0);
15172 if (mode == SFmode)
15173 copysign_insn = gen_copysignsf3_const;
15174 else if (mode == DFmode)
15175 copysign_insn = gen_copysigndf3_const;
15177 copysign_insn = gen_copysigntf3_const;
15179 emit_insn (copysign_insn (dest, op0, op1, mask));
15183 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
15185 nmask = ix86_build_signbit_mask (mode, 0, 1);
15186 mask = ix86_build_signbit_mask (mode, 0, 0);
15188 if (mode == SFmode)
15189 copysign_insn = gen_copysignsf3_var;
15190 else if (mode == DFmode)
15191 copysign_insn = gen_copysigndf3_var;
15193 copysign_insn = gen_copysigntf3_var;
15195 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
15199 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
15200 be a constant, and so has already been expanded into a vector constant. */
15203 ix86_split_copysign_const (rtx operands[])
15205 enum machine_mode mode, vmode;
15206 rtx dest, op0, mask, x;
15208 dest = operands[0];
15210 mask = operands[3];
15212 mode = GET_MODE (dest);
15213 vmode = GET_MODE (mask);
15215 dest = simplify_gen_subreg (vmode, dest, mode, 0);
15216 x = gen_rtx_AND (vmode, dest, mask);
15217 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15219 if (op0 != CONST0_RTX (vmode))
15221 x = gen_rtx_IOR (vmode, dest, op0);
15222 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15226 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
15227 so we have to do two masks. */
15230 ix86_split_copysign_var (rtx operands[])
15232 enum machine_mode mode, vmode;
15233 rtx dest, scratch, op0, op1, mask, nmask, x;
15235 dest = operands[0];
15236 scratch = operands[1];
15239 nmask = operands[4];
15240 mask = operands[5];
15242 mode = GET_MODE (dest);
15243 vmode = GET_MODE (mask);
15245 if (rtx_equal_p (op0, op1))
15247 /* Shouldn't happen often (it's useless, obviously), but when it does
15248 we'd generate incorrect code if we continue below. */
15249 emit_move_insn (dest, op0);
15253 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
15255 gcc_assert (REGNO (op1) == REGNO (scratch));
15257 x = gen_rtx_AND (vmode, scratch, mask);
15258 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
15261 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
15262 x = gen_rtx_NOT (vmode, dest);
15263 x = gen_rtx_AND (vmode, x, op0);
15264 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15268 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
15270 x = gen_rtx_AND (vmode, scratch, mask);
15272 else /* alternative 2,4 */
15274 gcc_assert (REGNO (mask) == REGNO (scratch));
15275 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
15276 x = gen_rtx_AND (vmode, scratch, op1);
15278 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
15280 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
15282 dest = simplify_gen_subreg (vmode, op0, mode, 0);
15283 x = gen_rtx_AND (vmode, dest, nmask);
15285 else /* alternative 3,4 */
15287 gcc_assert (REGNO (nmask) == REGNO (dest));
15289 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
15290 x = gen_rtx_AND (vmode, dest, op0);
15292 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15295 x = gen_rtx_IOR (vmode, dest, scratch);
15296 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15299 /* Return TRUE or FALSE depending on whether the first SET in INSN
15300 has source and destination with matching CC modes, and that the
15301 CC mode is at least as constrained as REQ_MODE. */
15304 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
15307 enum machine_mode set_mode;
15309 set = PATTERN (insn);
15310 if (GET_CODE (set) == PARALLEL)
15311 set = XVECEXP (set, 0, 0);
15312 gcc_assert (GET_CODE (set) == SET);
15313 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
15315 set_mode = GET_MODE (SET_DEST (set));
15319 if (req_mode != CCNOmode
15320 && (req_mode != CCmode
15321 || XEXP (SET_SRC (set), 1) != const0_rtx))
15325 if (req_mode == CCGCmode)
15329 if (req_mode == CCGOCmode || req_mode == CCNOmode)
15333 if (req_mode == CCZmode)
15344 gcc_unreachable ();
15347 return (GET_MODE (SET_SRC (set)) == set_mode);
15350 /* Generate insn patterns to do an integer compare of OPERANDS. */
15353 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
15355 enum machine_mode cmpmode;
15358 cmpmode = SELECT_CC_MODE (code, op0, op1);
15359 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
15361 /* This is very simple, but making the interface the same as in the
15362 FP case makes the rest of the code easier. */
15363 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
15364 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
15366 /* Return the test that should be put into the flags user, i.e.
15367 the bcc, scc, or cmov instruction. */
15368 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
15371 /* Figure out whether to use ordered or unordered fp comparisons.
15372 Return the appropriate mode to use. */
15375 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
15377 /* ??? In order to make all comparisons reversible, we do all comparisons
15378 non-trapping when compiling for IEEE. Once gcc is able to distinguish
15379 all forms trapping and nontrapping comparisons, we can make inequality
15380 comparisons trapping again, since it results in better code when using
15381 FCOM based compares. */
15382 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
15386 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
15388 enum machine_mode mode = GET_MODE (op0);
15390 if (SCALAR_FLOAT_MODE_P (mode))
15392 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
15393 return ix86_fp_compare_mode (code);
15398 /* Only zero flag is needed. */
15399 case EQ: /* ZF=0 */
15400 case NE: /* ZF!=0 */
15402 /* Codes needing carry flag. */
15403 case GEU: /* CF=0 */
15404 case LTU: /* CF=1 */
15405 /* Detect overflow checks. They need just the carry flag. */
15406 if (GET_CODE (op0) == PLUS
15407 && rtx_equal_p (op1, XEXP (op0, 0)))
15411 case GTU: /* CF=0 & ZF=0 */
15412 case LEU: /* CF=1 | ZF=1 */
15413 /* Detect overflow checks. They need just the carry flag. */
15414 if (GET_CODE (op0) == MINUS
15415 && rtx_equal_p (op1, XEXP (op0, 0)))
15419 /* Codes possibly doable only with sign flag when
15420 comparing against zero. */
15421 case GE: /* SF=OF or SF=0 */
15422 case LT: /* SF<>OF or SF=1 */
15423 if (op1 == const0_rtx)
15426 /* For other cases Carry flag is not required. */
15428 /* Codes doable only with sign flag when comparing
15429 against zero, but we miss jump instruction for it
15430 so we need to use relational tests against overflow
15431 that thus needs to be zero. */
15432 case GT: /* ZF=0 & SF=OF */
15433 case LE: /* ZF=1 | SF<>OF */
15434 if (op1 == const0_rtx)
15438 /* strcmp pattern do (use flags) and combine may ask us for proper
15443 gcc_unreachable ();
15447 /* Return the fixed registers used for condition codes. */
15450 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
15457 /* If two condition code modes are compatible, return a condition code
15458 mode which is compatible with both. Otherwise, return
15461 static enum machine_mode
15462 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
15467 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
15470 if ((m1 == CCGCmode && m2 == CCGOCmode)
15471 || (m1 == CCGOCmode && m2 == CCGCmode))
15477 gcc_unreachable ();
15507 /* These are only compatible with themselves, which we already
15514 /* Return a comparison we can do and that it is equivalent to
15515 swap_condition (code) apart possibly from orderedness.
15516 But, never change orderedness if TARGET_IEEE_FP, returning
15517 UNKNOWN in that case if necessary. */
15519 static enum rtx_code
15520 ix86_fp_swap_condition (enum rtx_code code)
15524 case GT: /* GTU - CF=0 & ZF=0 */
15525 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
15526 case GE: /* GEU - CF=0 */
15527 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
15528 case UNLT: /* LTU - CF=1 */
15529 return TARGET_IEEE_FP ? UNKNOWN : GT;
15530 case UNLE: /* LEU - CF=1 | ZF=1 */
15531 return TARGET_IEEE_FP ? UNKNOWN : GE;
15533 return swap_condition (code);
15537 /* Return cost of comparison CODE using the best strategy for performance.
15538 All following functions do use number of instructions as a cost metrics.
15539 In future this should be tweaked to compute bytes for optimize_size and
15540 take into account performance of various instructions on various CPUs. */
15543 ix86_fp_comparison_cost (enum rtx_code code)
15547 /* The cost of code using bit-twiddling on %ah. */
15564 arith_cost = TARGET_IEEE_FP ? 5 : 4;
15568 arith_cost = TARGET_IEEE_FP ? 6 : 4;
15571 gcc_unreachable ();
15574 switch (ix86_fp_comparison_strategy (code))
15576 case IX86_FPCMP_COMI:
15577 return arith_cost > 4 ? 3 : 2;
15578 case IX86_FPCMP_SAHF:
15579 return arith_cost > 4 ? 4 : 3;
15585 /* Return strategy to use for floating-point. We assume that fcomi is always
15586 preferrable where available, since that is also true when looking at size
15587 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
15589 enum ix86_fpcmp_strategy
15590 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
15592 /* Do fcomi/sahf based test when profitable. */
15595 return IX86_FPCMP_COMI;
15597 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
15598 return IX86_FPCMP_SAHF;
15600 return IX86_FPCMP_ARITH;
15603 /* Swap, force into registers, or otherwise massage the two operands
15604 to a fp comparison. The operands are updated in place; the new
15605 comparison code is returned. */
15607 static enum rtx_code
15608 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
15610 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
15611 rtx op0 = *pop0, op1 = *pop1;
15612 enum machine_mode op_mode = GET_MODE (op0);
15613 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
15615 /* All of the unordered compare instructions only work on registers.
15616 The same is true of the fcomi compare instructions. The XFmode
15617 compare instructions require registers except when comparing
15618 against zero or when converting operand 1 from fixed point to
15622 && (fpcmp_mode == CCFPUmode
15623 || (op_mode == XFmode
15624 && ! (standard_80387_constant_p (op0) == 1
15625 || standard_80387_constant_p (op1) == 1)
15626 && GET_CODE (op1) != FLOAT)
15627 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
15629 op0 = force_reg (op_mode, op0);
15630 op1 = force_reg (op_mode, op1);
15634 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
15635 things around if they appear profitable, otherwise force op0
15636 into a register. */
15638 if (standard_80387_constant_p (op0) == 0
15640 && ! (standard_80387_constant_p (op1) == 0
15643 enum rtx_code new_code = ix86_fp_swap_condition (code);
15644 if (new_code != UNKNOWN)
15647 tmp = op0, op0 = op1, op1 = tmp;
15653 op0 = force_reg (op_mode, op0);
15655 if (CONSTANT_P (op1))
15657 int tmp = standard_80387_constant_p (op1);
15659 op1 = validize_mem (force_const_mem (op_mode, op1));
15663 op1 = force_reg (op_mode, op1);
15666 op1 = force_reg (op_mode, op1);
15670 /* Try to rearrange the comparison to make it cheaper. */
15671 if (ix86_fp_comparison_cost (code)
15672 > ix86_fp_comparison_cost (swap_condition (code))
15673 && (REG_P (op1) || can_create_pseudo_p ()))
15676 tmp = op0, op0 = op1, op1 = tmp;
15677 code = swap_condition (code);
15679 op0 = force_reg (op_mode, op0);
15687 /* Convert comparison codes we use to represent FP comparison to integer
15688 code that will result in proper branch. Return UNKNOWN if no such code
15692 ix86_fp_compare_code_to_integer (enum rtx_code code)
15721 /* Generate insn patterns to do a floating point compare of OPERANDS. */
15724 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
15726 enum machine_mode fpcmp_mode, intcmp_mode;
15729 fpcmp_mode = ix86_fp_compare_mode (code);
15730 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
15732 /* Do fcomi/sahf based test when profitable. */
15733 switch (ix86_fp_comparison_strategy (code))
15735 case IX86_FPCMP_COMI:
15736 intcmp_mode = fpcmp_mode;
15737 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
15738 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
15743 case IX86_FPCMP_SAHF:
15744 intcmp_mode = fpcmp_mode;
15745 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
15746 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
15750 scratch = gen_reg_rtx (HImode);
15751 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
15752 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
15755 case IX86_FPCMP_ARITH:
15756 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
15757 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
15758 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
15760 scratch = gen_reg_rtx (HImode);
15761 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
15763 /* In the unordered case, we have to check C2 for NaN's, which
15764 doesn't happen to work out to anything nice combination-wise.
15765 So do some bit twiddling on the value we've got in AH to come
15766 up with an appropriate set of condition codes. */
15768 intcmp_mode = CCNOmode;
15773 if (code == GT || !TARGET_IEEE_FP)
15775 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
15780 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
15781 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
15782 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
15783 intcmp_mode = CCmode;
15789 if (code == LT && TARGET_IEEE_FP)
15791 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
15792 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
15793 intcmp_mode = CCmode;
15798 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
15804 if (code == GE || !TARGET_IEEE_FP)
15806 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
15811 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
15812 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
15818 if (code == LE && TARGET_IEEE_FP)
15820 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
15821 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
15822 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
15823 intcmp_mode = CCmode;
15828 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
15834 if (code == EQ && TARGET_IEEE_FP)
15836 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
15837 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
15838 intcmp_mode = CCmode;
15843 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
15849 if (code == NE && TARGET_IEEE_FP)
15851 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
15852 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
15858 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
15864 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
15868 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
15873 gcc_unreachable ();
15881 /* Return the test that should be put into the flags user, i.e.
15882 the bcc, scc, or cmov instruction. */
15883 return gen_rtx_fmt_ee (code, VOIDmode,
15884 gen_rtx_REG (intcmp_mode, FLAGS_REG),
15889 ix86_expand_compare (enum rtx_code code)
15892 op0 = ix86_compare_op0;
15893 op1 = ix86_compare_op1;
15895 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC)
15896 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_op0, ix86_compare_op1);
15898 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
15900 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
15901 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
15904 ret = ix86_expand_int_compare (code, op0, op1);
15910 ix86_expand_branch (enum rtx_code code, rtx label)
15914 switch (GET_MODE (ix86_compare_op0))
15923 tmp = ix86_expand_compare (code);
15924 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
15925 gen_rtx_LABEL_REF (VOIDmode, label),
15927 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
15934 /* Expand DImode branch into multiple compare+branch. */
15936 rtx lo[2], hi[2], label2;
15937 enum rtx_code code1, code2, code3;
15938 enum machine_mode submode;
15940 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
15942 tmp = ix86_compare_op0;
15943 ix86_compare_op0 = ix86_compare_op1;
15944 ix86_compare_op1 = tmp;
15945 code = swap_condition (code);
15947 if (GET_MODE (ix86_compare_op0) == DImode)
15949 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
15950 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
15955 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
15956 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
15960 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
15961 avoid two branches. This costs one extra insn, so disable when
15962 optimizing for size. */
15964 if ((code == EQ || code == NE)
15965 && (!optimize_insn_for_size_p ()
15966 || hi[1] == const0_rtx || lo[1] == const0_rtx))
15971 if (hi[1] != const0_rtx)
15972 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
15973 NULL_RTX, 0, OPTAB_WIDEN);
15976 if (lo[1] != const0_rtx)
15977 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
15978 NULL_RTX, 0, OPTAB_WIDEN);
15980 tmp = expand_binop (submode, ior_optab, xor1, xor0,
15981 NULL_RTX, 0, OPTAB_WIDEN);
15983 ix86_compare_op0 = tmp;
15984 ix86_compare_op1 = const0_rtx;
15985 ix86_expand_branch (code, label);
15989 /* Otherwise, if we are doing less-than or greater-or-equal-than,
15990 op1 is a constant and the low word is zero, then we can just
15991 examine the high word. Similarly for low word -1 and
15992 less-or-equal-than or greater-than. */
15994 if (CONST_INT_P (hi[1]))
15997 case LT: case LTU: case GE: case GEU:
15998 if (lo[1] == const0_rtx)
16000 ix86_compare_op0 = hi[0];
16001 ix86_compare_op1 = hi[1];
16002 ix86_expand_branch (code, label);
16006 case LE: case LEU: case GT: case GTU:
16007 if (lo[1] == constm1_rtx)
16009 ix86_compare_op0 = hi[0];
16010 ix86_compare_op1 = hi[1];
16011 ix86_expand_branch (code, label);
16019 /* Otherwise, we need two or three jumps. */
16021 label2 = gen_label_rtx ();
16024 code2 = swap_condition (code);
16025 code3 = unsigned_condition (code);
16029 case LT: case GT: case LTU: case GTU:
16032 case LE: code1 = LT; code2 = GT; break;
16033 case GE: code1 = GT; code2 = LT; break;
16034 case LEU: code1 = LTU; code2 = GTU; break;
16035 case GEU: code1 = GTU; code2 = LTU; break;
16037 case EQ: code1 = UNKNOWN; code2 = NE; break;
16038 case NE: code2 = UNKNOWN; break;
16041 gcc_unreachable ();
16046 * if (hi(a) < hi(b)) goto true;
16047 * if (hi(a) > hi(b)) goto false;
16048 * if (lo(a) < lo(b)) goto true;
16052 ix86_compare_op0 = hi[0];
16053 ix86_compare_op1 = hi[1];
16055 if (code1 != UNKNOWN)
16056 ix86_expand_branch (code1, label);
16057 if (code2 != UNKNOWN)
16058 ix86_expand_branch (code2, label2);
16060 ix86_compare_op0 = lo[0];
16061 ix86_compare_op1 = lo[1];
16062 ix86_expand_branch (code3, label);
16064 if (code2 != UNKNOWN)
16065 emit_label (label2);
16070 /* If we have already emitted a compare insn, go straight to simple.
16071 ix86_expand_compare won't emit anything if ix86_compare_emitted
16073 gcc_assert (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC);
16078 /* Split branch based on floating point condition. */
16080 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
16081 rtx target1, rtx target2, rtx tmp, rtx pushed)
16086 if (target2 != pc_rtx)
16089 code = reverse_condition_maybe_unordered (code);
16094 condition = ix86_expand_fp_compare (code, op1, op2,
16097 /* Remove pushed operand from stack. */
16099 ix86_free_from_memory (GET_MODE (pushed));
16101 i = emit_jump_insn (gen_rtx_SET
16103 gen_rtx_IF_THEN_ELSE (VOIDmode,
16104 condition, target1, target2)));
16105 if (split_branch_probability >= 0)
16106 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
16110 ix86_expand_setcc (enum rtx_code code, rtx dest)
16114 gcc_assert (GET_MODE (dest) == QImode);
16116 ret = ix86_expand_compare (code);
16117 PUT_MODE (ret, QImode);
16118 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
16121 /* Expand comparison setting or clearing carry flag. Return true when
16122 successful and set pop for the operation. */
16124 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
16126 enum machine_mode mode =
16127 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
16129 /* Do not handle DImode compares that go through special path. */
16130 if (mode == (TARGET_64BIT ? TImode : DImode))
16133 if (SCALAR_FLOAT_MODE_P (mode))
16135 rtx compare_op, compare_seq;
16137 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
16139 /* Shortcut: following common codes never translate
16140 into carry flag compares. */
16141 if (code == EQ || code == NE || code == UNEQ || code == LTGT
16142 || code == ORDERED || code == UNORDERED)
16145 /* These comparisons require zero flag; swap operands so they won't. */
16146 if ((code == GT || code == UNLE || code == LE || code == UNGT)
16147 && !TARGET_IEEE_FP)
16152 code = swap_condition (code);
16155 /* Try to expand the comparison and verify that we end up with
16156 carry flag based comparison. This fails to be true only when
16157 we decide to expand comparison using arithmetic that is not
16158 too common scenario. */
16160 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
16161 compare_seq = get_insns ();
16164 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
16165 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
16166 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
16168 code = GET_CODE (compare_op);
16170 if (code != LTU && code != GEU)
16173 emit_insn (compare_seq);
16178 if (!INTEGRAL_MODE_P (mode))
16187 /* Convert a==0 into (unsigned)a<1. */
16190 if (op1 != const0_rtx)
16193 code = (code == EQ ? LTU : GEU);
16196 /* Convert a>b into b<a or a>=b-1. */
16199 if (CONST_INT_P (op1))
16201 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
16202 /* Bail out on overflow. We still can swap operands but that
16203 would force loading of the constant into register. */
16204 if (op1 == const0_rtx
16205 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
16207 code = (code == GTU ? GEU : LTU);
16214 code = (code == GTU ? LTU : GEU);
16218 /* Convert a>=0 into (unsigned)a<0x80000000. */
16221 if (mode == DImode || op1 != const0_rtx)
16223 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
16224 code = (code == LT ? GEU : LTU);
16228 if (mode == DImode || op1 != constm1_rtx)
16230 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
16231 code = (code == LE ? GEU : LTU);
16237 /* Swapping operands may cause constant to appear as first operand. */
16238 if (!nonimmediate_operand (op0, VOIDmode))
16240 if (!can_create_pseudo_p ())
16242 op0 = force_reg (mode, op0);
16244 ix86_compare_op0 = op0;
16245 ix86_compare_op1 = op1;
16246 *pop = ix86_expand_compare (code);
16247 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
16252 ix86_expand_int_movcc (rtx operands[])
16254 enum rtx_code code = GET_CODE (operands[1]), compare_code;
16255 rtx compare_seq, compare_op;
16256 enum machine_mode mode = GET_MODE (operands[0]);
16257 bool sign_bit_compare_p = false;
16260 ix86_compare_op0 = XEXP (operands[1], 0);
16261 ix86_compare_op1 = XEXP (operands[1], 1);
16262 compare_op = ix86_expand_compare (code);
16263 compare_seq = get_insns ();
16266 compare_code = GET_CODE (compare_op);
16268 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
16269 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
16270 sign_bit_compare_p = true;
16272 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
16273 HImode insns, we'd be swallowed in word prefix ops. */
16275 if ((mode != HImode || TARGET_FAST_PREFIX)
16276 && (mode != (TARGET_64BIT ? TImode : DImode))
16277 && CONST_INT_P (operands[2])
16278 && CONST_INT_P (operands[3]))
16280 rtx out = operands[0];
16281 HOST_WIDE_INT ct = INTVAL (operands[2]);
16282 HOST_WIDE_INT cf = INTVAL (operands[3]);
16283 HOST_WIDE_INT diff;
16286 /* Sign bit compares are better done using shifts than we do by using
16288 if (sign_bit_compare_p
16289 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
16290 ix86_compare_op1, &compare_op))
16292 /* Detect overlap between destination and compare sources. */
16295 if (!sign_bit_compare_p)
16298 bool fpcmp = false;
16300 compare_code = GET_CODE (compare_op);
16302 flags = XEXP (compare_op, 0);
16304 if (GET_MODE (flags) == CCFPmode
16305 || GET_MODE (flags) == CCFPUmode)
16309 = ix86_fp_compare_code_to_integer (compare_code);
16312 /* To simplify rest of code, restrict to the GEU case. */
16313 if (compare_code == LTU)
16315 HOST_WIDE_INT tmp = ct;
16318 compare_code = reverse_condition (compare_code);
16319 code = reverse_condition (code);
16324 PUT_CODE (compare_op,
16325 reverse_condition_maybe_unordered
16326 (GET_CODE (compare_op)));
16328 PUT_CODE (compare_op,
16329 reverse_condition (GET_CODE (compare_op)));
16333 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
16334 || reg_overlap_mentioned_p (out, ix86_compare_op1))
16335 tmp = gen_reg_rtx (mode);
16337 if (mode == DImode)
16338 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
16340 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
16341 flags, compare_op));
16345 if (code == GT || code == GE)
16346 code = reverse_condition (code);
16349 HOST_WIDE_INT tmp = ct;
16354 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
16355 ix86_compare_op1, VOIDmode, 0, -1);
16368 tmp = expand_simple_binop (mode, PLUS,
16370 copy_rtx (tmp), 1, OPTAB_DIRECT);
16381 tmp = expand_simple_binop (mode, IOR,
16383 copy_rtx (tmp), 1, OPTAB_DIRECT);
16385 else if (diff == -1 && ct)
16395 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
16397 tmp = expand_simple_binop (mode, PLUS,
16398 copy_rtx (tmp), GEN_INT (cf),
16399 copy_rtx (tmp), 1, OPTAB_DIRECT);
16407 * andl cf - ct, dest
16417 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
16420 tmp = expand_simple_binop (mode, AND,
16422 gen_int_mode (cf - ct, mode),
16423 copy_rtx (tmp), 1, OPTAB_DIRECT);
16425 tmp = expand_simple_binop (mode, PLUS,
16426 copy_rtx (tmp), GEN_INT (ct),
16427 copy_rtx (tmp), 1, OPTAB_DIRECT);
16430 if (!rtx_equal_p (tmp, out))
16431 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
16433 return 1; /* DONE */
16438 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
16441 tmp = ct, ct = cf, cf = tmp;
16444 if (SCALAR_FLOAT_MODE_P (cmp_mode))
16446 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
16448 /* We may be reversing unordered compare to normal compare, that
16449 is not valid in general (we may convert non-trapping condition
16450 to trapping one), however on i386 we currently emit all
16451 comparisons unordered. */
16452 compare_code = reverse_condition_maybe_unordered (compare_code);
16453 code = reverse_condition_maybe_unordered (code);
16457 compare_code = reverse_condition (compare_code);
16458 code = reverse_condition (code);
16462 compare_code = UNKNOWN;
16463 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
16464 && CONST_INT_P (ix86_compare_op1))
16466 if (ix86_compare_op1 == const0_rtx
16467 && (code == LT || code == GE))
16468 compare_code = code;
16469 else if (ix86_compare_op1 == constm1_rtx)
16473 else if (code == GT)
16478 /* Optimize dest = (op0 < 0) ? -1 : cf. */
16479 if (compare_code != UNKNOWN
16480 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
16481 && (cf == -1 || ct == -1))
16483 /* If lea code below could be used, only optimize
16484 if it results in a 2 insn sequence. */
16486 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
16487 || diff == 3 || diff == 5 || diff == 9)
16488 || (compare_code == LT && ct == -1)
16489 || (compare_code == GE && cf == -1))
16492 * notl op1 (if necessary)
16500 code = reverse_condition (code);
16503 out = emit_store_flag (out, code, ix86_compare_op0,
16504 ix86_compare_op1, VOIDmode, 0, -1);
16506 out = expand_simple_binop (mode, IOR,
16508 out, 1, OPTAB_DIRECT);
16509 if (out != operands[0])
16510 emit_move_insn (operands[0], out);
16512 return 1; /* DONE */
16517 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
16518 || diff == 3 || diff == 5 || diff == 9)
16519 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
16521 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
16527 * lea cf(dest*(ct-cf)),dest
16531 * This also catches the degenerate setcc-only case.
16537 out = emit_store_flag (out, code, ix86_compare_op0,
16538 ix86_compare_op1, VOIDmode, 0, 1);
16541 /* On x86_64 the lea instruction operates on Pmode, so we need
16542 to get arithmetics done in proper mode to match. */
16544 tmp = copy_rtx (out);
16548 out1 = copy_rtx (out);
16549 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
16553 tmp = gen_rtx_PLUS (mode, tmp, out1);
16559 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
16562 if (!rtx_equal_p (tmp, out))
16565 out = force_operand (tmp, copy_rtx (out));
16567 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
16569 if (!rtx_equal_p (out, operands[0]))
16570 emit_move_insn (operands[0], copy_rtx (out));
16572 return 1; /* DONE */
16576 * General case: Jumpful:
16577 * xorl dest,dest cmpl op1, op2
16578 * cmpl op1, op2 movl ct, dest
16579 * setcc dest jcc 1f
16580 * decl dest movl cf, dest
16581 * andl (cf-ct),dest 1:
16584 * Size 20. Size 14.
16586 * This is reasonably steep, but branch mispredict costs are
16587 * high on modern cpus, so consider failing only if optimizing
16591 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
16592 && BRANCH_COST (optimize_insn_for_speed_p (),
16597 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
16602 if (SCALAR_FLOAT_MODE_P (cmp_mode))
16604 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
16606 /* We may be reversing unordered compare to normal compare,
16607 that is not valid in general (we may convert non-trapping
16608 condition to trapping one), however on i386 we currently
16609 emit all comparisons unordered. */
16610 code = reverse_condition_maybe_unordered (code);
16614 code = reverse_condition (code);
16615 if (compare_code != UNKNOWN)
16616 compare_code = reverse_condition (compare_code);
16620 if (compare_code != UNKNOWN)
16622 /* notl op1 (if needed)
16627 For x < 0 (resp. x <= -1) there will be no notl,
16628 so if possible swap the constants to get rid of the
16630 True/false will be -1/0 while code below (store flag
16631 followed by decrement) is 0/-1, so the constants need
16632 to be exchanged once more. */
16634 if (compare_code == GE || !cf)
16636 code = reverse_condition (code);
16641 HOST_WIDE_INT tmp = cf;
16646 out = emit_store_flag (out, code, ix86_compare_op0,
16647 ix86_compare_op1, VOIDmode, 0, -1);
16651 out = emit_store_flag (out, code, ix86_compare_op0,
16652 ix86_compare_op1, VOIDmode, 0, 1);
16654 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
16655 copy_rtx (out), 1, OPTAB_DIRECT);
16658 out = expand_simple_binop (mode, AND, copy_rtx (out),
16659 gen_int_mode (cf - ct, mode),
16660 copy_rtx (out), 1, OPTAB_DIRECT);
16662 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
16663 copy_rtx (out), 1, OPTAB_DIRECT);
16664 if (!rtx_equal_p (out, operands[0]))
16665 emit_move_insn (operands[0], copy_rtx (out));
16667 return 1; /* DONE */
16671 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
16673 /* Try a few things more with specific constants and a variable. */
16676 rtx var, orig_out, out, tmp;
16678 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
16679 return 0; /* FAIL */
16681 /* If one of the two operands is an interesting constant, load a
16682 constant with the above and mask it in with a logical operation. */
16684 if (CONST_INT_P (operands[2]))
16687 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
16688 operands[3] = constm1_rtx, op = and_optab;
16689 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
16690 operands[3] = const0_rtx, op = ior_optab;
16692 return 0; /* FAIL */
16694 else if (CONST_INT_P (operands[3]))
16697 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
16698 operands[2] = constm1_rtx, op = and_optab;
16699 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
16700 operands[2] = const0_rtx, op = ior_optab;
16702 return 0; /* FAIL */
16705 return 0; /* FAIL */
16707 orig_out = operands[0];
16708 tmp = gen_reg_rtx (mode);
16711 /* Recurse to get the constant loaded. */
16712 if (ix86_expand_int_movcc (operands) == 0)
16713 return 0; /* FAIL */
16715 /* Mask in the interesting variable. */
16716 out = expand_binop (mode, op, var, tmp, orig_out, 0,
16718 if (!rtx_equal_p (out, orig_out))
16719 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
16721 return 1; /* DONE */
16725 * For comparison with above,
16735 if (! nonimmediate_operand (operands[2], mode))
16736 operands[2] = force_reg (mode, operands[2]);
16737 if (! nonimmediate_operand (operands[3], mode))
16738 operands[3] = force_reg (mode, operands[3]);
16740 if (! register_operand (operands[2], VOIDmode)
16742 || ! register_operand (operands[3], VOIDmode)))
16743 operands[2] = force_reg (mode, operands[2]);
16746 && ! register_operand (operands[3], VOIDmode))
16747 operands[3] = force_reg (mode, operands[3]);
16749 emit_insn (compare_seq);
16750 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
16751 gen_rtx_IF_THEN_ELSE (mode,
16752 compare_op, operands[2],
16755 return 1; /* DONE */
16758 /* Swap, force into registers, or otherwise massage the two operands
16759 to an sse comparison with a mask result. Thus we differ a bit from
16760 ix86_prepare_fp_compare_args which expects to produce a flags result.
16762 The DEST operand exists to help determine whether to commute commutative
16763 operators. The POP0/POP1 operands are updated in place. The new
16764 comparison code is returned, or UNKNOWN if not implementable. */
16766 static enum rtx_code
16767 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
16768 rtx *pop0, rtx *pop1)
16776 /* We have no LTGT as an operator. We could implement it with
16777 NE & ORDERED, but this requires an extra temporary. It's
16778 not clear that it's worth it. */
16785 /* These are supported directly. */
16792 /* For commutative operators, try to canonicalize the destination
16793 operand to be first in the comparison - this helps reload to
16794 avoid extra moves. */
16795 if (!dest || !rtx_equal_p (dest, *pop1))
16803 /* These are not supported directly. Swap the comparison operands
16804 to transform into something that is supported. */
16808 code = swap_condition (code);
16812 gcc_unreachable ();
16818 /* Detect conditional moves that exactly match min/max operational
16819 semantics. Note that this is IEEE safe, as long as we don't
16820 interchange the operands.
16822 Returns FALSE if this conditional move doesn't match a MIN/MAX,
16823 and TRUE if the operation is successful and instructions are emitted. */
16826 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
16827 rtx cmp_op1, rtx if_true, rtx if_false)
16829 enum machine_mode mode;
16835 else if (code == UNGE)
16838 if_true = if_false;
16844 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
16846 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
16851 mode = GET_MODE (dest);
16853 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
16854 but MODE may be a vector mode and thus not appropriate. */
16855 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
16857 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
16860 if_true = force_reg (mode, if_true);
16861 v = gen_rtvec (2, if_true, if_false);
16862 tmp = gen_rtx_UNSPEC (mode, v, u);
16866 code = is_min ? SMIN : SMAX;
16867 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
16870 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
16874 /* Expand an sse vector comparison. Return the register with the result. */
16877 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
16878 rtx op_true, rtx op_false)
16880 enum machine_mode mode = GET_MODE (dest);
16883 cmp_op0 = force_reg (mode, cmp_op0);
16884 if (!nonimmediate_operand (cmp_op1, mode))
16885 cmp_op1 = force_reg (mode, cmp_op1);
16888 || reg_overlap_mentioned_p (dest, op_true)
16889 || reg_overlap_mentioned_p (dest, op_false))
16890 dest = gen_reg_rtx (mode);
16892 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
16893 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16898 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
16899 operations. This is used for both scalar and vector conditional moves. */
16902 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
16904 enum machine_mode mode = GET_MODE (dest);
16907 if (op_false == CONST0_RTX (mode))
16909 op_true = force_reg (mode, op_true);
16910 x = gen_rtx_AND (mode, cmp, op_true);
16911 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16913 else if (op_true == CONST0_RTX (mode))
16915 op_false = force_reg (mode, op_false);
16916 x = gen_rtx_NOT (mode, cmp);
16917 x = gen_rtx_AND (mode, x, op_false);
16918 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16920 else if (TARGET_XOP)
16922 rtx pcmov = gen_rtx_SET (mode, dest,
16923 gen_rtx_IF_THEN_ELSE (mode, cmp,
16930 op_true = force_reg (mode, op_true);
16931 op_false = force_reg (mode, op_false);
16933 t2 = gen_reg_rtx (mode);
16935 t3 = gen_reg_rtx (mode);
16939 x = gen_rtx_AND (mode, op_true, cmp);
16940 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
16942 x = gen_rtx_NOT (mode, cmp);
16943 x = gen_rtx_AND (mode, x, op_false);
16944 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
16946 x = gen_rtx_IOR (mode, t3, t2);
16947 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16951 /* Expand a floating-point conditional move. Return true if successful. */
16954 ix86_expand_fp_movcc (rtx operands[])
16956 enum machine_mode mode = GET_MODE (operands[0]);
16957 enum rtx_code code = GET_CODE (operands[1]);
16958 rtx tmp, compare_op;
16960 ix86_compare_op0 = XEXP (operands[1], 0);
16961 ix86_compare_op1 = XEXP (operands[1], 1);
16962 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
16964 enum machine_mode cmode;
16966 /* Since we've no cmove for sse registers, don't force bad register
16967 allocation just to gain access to it. Deny movcc when the
16968 comparison mode doesn't match the move mode. */
16969 cmode = GET_MODE (ix86_compare_op0);
16970 if (cmode == VOIDmode)
16971 cmode = GET_MODE (ix86_compare_op1);
16975 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
16977 &ix86_compare_op1);
16978 if (code == UNKNOWN)
16981 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
16982 ix86_compare_op1, operands[2],
16986 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
16987 ix86_compare_op1, operands[2], operands[3]);
16988 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
16992 /* The floating point conditional move instructions don't directly
16993 support conditions resulting from a signed integer comparison. */
16995 compare_op = ix86_expand_compare (code);
16996 if (!fcmov_comparison_operator (compare_op, VOIDmode))
16998 tmp = gen_reg_rtx (QImode);
16999 ix86_expand_setcc (code, tmp);
17001 ix86_compare_op0 = tmp;
17002 ix86_compare_op1 = const0_rtx;
17003 compare_op = ix86_expand_compare (code);
17006 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17007 gen_rtx_IF_THEN_ELSE (mode, compare_op,
17008 operands[2], operands[3])));
17013 /* Expand a floating-point vector conditional move; a vcond operation
17014 rather than a movcc operation. */
17017 ix86_expand_fp_vcond (rtx operands[])
17019 enum rtx_code code = GET_CODE (operands[3]);
17022 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
17023 &operands[4], &operands[5]);
17024 if (code == UNKNOWN)
17027 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
17028 operands[5], operands[1], operands[2]))
17031 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
17032 operands[1], operands[2]);
17033 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
17037 /* Expand a signed/unsigned integral vector conditional move. */
17040 ix86_expand_int_vcond (rtx operands[])
17042 enum machine_mode mode = GET_MODE (operands[0]);
17043 enum rtx_code code = GET_CODE (operands[3]);
17044 bool negate = false;
17047 cop0 = operands[4];
17048 cop1 = operands[5];
17050 /* XOP supports all of the comparisons on all vector int types. */
17053 /* Canonicalize the comparison to EQ, GT, GTU. */
17064 code = reverse_condition (code);
17070 code = reverse_condition (code);
17076 code = swap_condition (code);
17077 x = cop0, cop0 = cop1, cop1 = x;
17081 gcc_unreachable ();
17084 /* Only SSE4.1/SSE4.2 supports V2DImode. */
17085 if (mode == V2DImode)
17090 /* SSE4.1 supports EQ. */
17091 if (!TARGET_SSE4_1)
17097 /* SSE4.2 supports GT/GTU. */
17098 if (!TARGET_SSE4_2)
17103 gcc_unreachable ();
17107 /* Unsigned parallel compare is not supported by the hardware.
17108 Play some tricks to turn this into a signed comparison
17112 cop0 = force_reg (mode, cop0);
17120 rtx (*gen_sub3) (rtx, rtx, rtx);
17122 /* Subtract (-(INT MAX) - 1) from both operands to make
17124 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
17126 gen_sub3 = (mode == V4SImode
17127 ? gen_subv4si3 : gen_subv2di3);
17128 t1 = gen_reg_rtx (mode);
17129 emit_insn (gen_sub3 (t1, cop0, mask));
17131 t2 = gen_reg_rtx (mode);
17132 emit_insn (gen_sub3 (t2, cop1, mask));
17142 /* Perform a parallel unsigned saturating subtraction. */
17143 x = gen_reg_rtx (mode);
17144 emit_insn (gen_rtx_SET (VOIDmode, x,
17145 gen_rtx_US_MINUS (mode, cop0, cop1)));
17148 cop1 = CONST0_RTX (mode);
17154 gcc_unreachable ();
17159 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
17160 operands[1+negate], operands[2-negate]);
17162 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
17163 operands[2-negate]);
17167 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
17168 true if we should do zero extension, else sign extension. HIGH_P is
17169 true if we want the N/2 high elements, else the low elements. */
17172 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
17174 enum machine_mode imode = GET_MODE (operands[1]);
17175 rtx (*unpack)(rtx, rtx, rtx);
17182 unpack = gen_vec_interleave_highv16qi;
17184 unpack = gen_vec_interleave_lowv16qi;
17188 unpack = gen_vec_interleave_highv8hi;
17190 unpack = gen_vec_interleave_lowv8hi;
17194 unpack = gen_vec_interleave_highv4si;
17196 unpack = gen_vec_interleave_lowv4si;
17199 gcc_unreachable ();
17202 dest = gen_lowpart (imode, operands[0]);
17205 se = force_reg (imode, CONST0_RTX (imode));
17207 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
17208 operands[1], pc_rtx, pc_rtx);
17210 emit_insn (unpack (dest, operands[1], se));
17213 /* This function performs the same task as ix86_expand_sse_unpack,
17214 but with SSE4.1 instructions. */
17217 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
17219 enum machine_mode imode = GET_MODE (operands[1]);
17220 rtx (*unpack)(rtx, rtx);
17227 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
17229 unpack = gen_sse4_1_extendv8qiv8hi2;
17233 unpack = gen_sse4_1_zero_extendv4hiv4si2;
17235 unpack = gen_sse4_1_extendv4hiv4si2;
17239 unpack = gen_sse4_1_zero_extendv2siv2di2;
17241 unpack = gen_sse4_1_extendv2siv2di2;
17244 gcc_unreachable ();
17247 dest = operands[0];
17250 /* Shift higher 8 bytes to lower 8 bytes. */
17251 src = gen_reg_rtx (imode);
17252 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, src),
17253 gen_lowpart (V1TImode, operands[1]),
17259 emit_insn (unpack (dest, src));
17262 /* Expand conditional increment or decrement using adb/sbb instructions.
17263 The default case using setcc followed by the conditional move can be
17264 done by generic code. */
17266 ix86_expand_int_addcc (rtx operands[])
17268 enum rtx_code code = GET_CODE (operands[1]);
17270 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
17272 rtx val = const0_rtx;
17273 bool fpcmp = false;
17274 enum machine_mode mode;
17276 ix86_compare_op0 = XEXP (operands[1], 0);
17277 ix86_compare_op1 = XEXP (operands[1], 1);
17278 if (operands[3] != const1_rtx
17279 && operands[3] != constm1_rtx)
17281 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
17282 ix86_compare_op1, &compare_op))
17284 code = GET_CODE (compare_op);
17286 flags = XEXP (compare_op, 0);
17288 if (GET_MODE (flags) == CCFPmode
17289 || GET_MODE (flags) == CCFPUmode)
17292 code = ix86_fp_compare_code_to_integer (code);
17299 PUT_CODE (compare_op,
17300 reverse_condition_maybe_unordered
17301 (GET_CODE (compare_op)));
17303 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
17306 mode = GET_MODE (operands[0]);
17308 /* Construct either adc or sbb insn. */
17309 if ((code == LTU) == (operands[3] == constm1_rtx))
17314 insn = gen_subqi3_carry;
17317 insn = gen_subhi3_carry;
17320 insn = gen_subsi3_carry;
17323 insn = gen_subdi3_carry;
17326 gcc_unreachable ();
17334 insn = gen_addqi3_carry;
17337 insn = gen_addhi3_carry;
17340 insn = gen_addsi3_carry;
17343 insn = gen_adddi3_carry;
17346 gcc_unreachable ();
17349 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
17351 return 1; /* DONE */
17355 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
17356 works for floating pointer parameters and nonoffsetable memories.
17357 For pushes, it returns just stack offsets; the values will be saved
17358 in the right order. Maximally three parts are generated. */
17361 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
17366 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
17368 size = (GET_MODE_SIZE (mode) + 4) / 8;
17370 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
17371 gcc_assert (size >= 2 && size <= 4);
17373 /* Optimize constant pool reference to immediates. This is used by fp
17374 moves, that force all constants to memory to allow combining. */
17375 if (MEM_P (operand) && MEM_READONLY_P (operand))
17377 rtx tmp = maybe_get_pool_constant (operand);
17382 if (MEM_P (operand) && !offsettable_memref_p (operand))
17384 /* The only non-offsetable memories we handle are pushes. */
17385 int ok = push_operand (operand, VOIDmode);
17389 operand = copy_rtx (operand);
17390 PUT_MODE (operand, Pmode);
17391 parts[0] = parts[1] = parts[2] = parts[3] = operand;
17395 if (GET_CODE (operand) == CONST_VECTOR)
17397 enum machine_mode imode = int_mode_for_mode (mode);
17398 /* Caution: if we looked through a constant pool memory above,
17399 the operand may actually have a different mode now. That's
17400 ok, since we want to pun this all the way back to an integer. */
17401 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
17402 gcc_assert (operand != NULL);
17408 if (mode == DImode)
17409 split_di (&operand, 1, &parts[0], &parts[1]);
17414 if (REG_P (operand))
17416 gcc_assert (reload_completed);
17417 for (i = 0; i < size; i++)
17418 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
17420 else if (offsettable_memref_p (operand))
17422 operand = adjust_address (operand, SImode, 0);
17423 parts[0] = operand;
17424 for (i = 1; i < size; i++)
17425 parts[i] = adjust_address (operand, SImode, 4 * i);
17427 else if (GET_CODE (operand) == CONST_DOUBLE)
17432 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
17436 real_to_target (l, &r, mode);
17437 parts[3] = gen_int_mode (l[3], SImode);
17438 parts[2] = gen_int_mode (l[2], SImode);
17441 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
17442 parts[2] = gen_int_mode (l[2], SImode);
17445 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
17448 gcc_unreachable ();
17450 parts[1] = gen_int_mode (l[1], SImode);
17451 parts[0] = gen_int_mode (l[0], SImode);
17454 gcc_unreachable ();
17459 if (mode == TImode)
17460 split_ti (&operand, 1, &parts[0], &parts[1]);
17461 if (mode == XFmode || mode == TFmode)
17463 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
17464 if (REG_P (operand))
17466 gcc_assert (reload_completed);
17467 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
17468 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
17470 else if (offsettable_memref_p (operand))
17472 operand = adjust_address (operand, DImode, 0);
17473 parts[0] = operand;
17474 parts[1] = adjust_address (operand, upper_mode, 8);
17476 else if (GET_CODE (operand) == CONST_DOUBLE)
17481 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
17482 real_to_target (l, &r, mode);
17484 /* Do not use shift by 32 to avoid warning on 32bit systems. */
17485 if (HOST_BITS_PER_WIDE_INT >= 64)
17488 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
17489 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
17492 parts[0] = immed_double_const (l[0], l[1], DImode);
17494 if (upper_mode == SImode)
17495 parts[1] = gen_int_mode (l[2], SImode);
17496 else if (HOST_BITS_PER_WIDE_INT >= 64)
17499 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
17500 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
17503 parts[1] = immed_double_const (l[2], l[3], DImode);
17506 gcc_unreachable ();
17513 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
17514 Return false when normal moves are needed; true when all required
17515 insns have been emitted. Operands 2-4 contain the input values
17516 int the correct order; operands 5-7 contain the output values. */
17519 ix86_split_long_move (rtx operands[])
17524 int collisions = 0;
17525 enum machine_mode mode = GET_MODE (operands[0]);
17526 bool collisionparts[4];
17528 /* The DFmode expanders may ask us to move double.
17529 For 64bit target this is single move. By hiding the fact
17530 here we simplify i386.md splitters. */
17531 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
17533 /* Optimize constant pool reference to immediates. This is used by
17534 fp moves, that force all constants to memory to allow combining. */
17536 if (MEM_P (operands[1])
17537 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
17538 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
17539 operands[1] = get_pool_constant (XEXP (operands[1], 0));
17540 if (push_operand (operands[0], VOIDmode))
17542 operands[0] = copy_rtx (operands[0]);
17543 PUT_MODE (operands[0], Pmode);
17546 operands[0] = gen_lowpart (DImode, operands[0]);
17547 operands[1] = gen_lowpart (DImode, operands[1]);
17548 emit_move_insn (operands[0], operands[1]);
17552 /* The only non-offsettable memory we handle is push. */
17553 if (push_operand (operands[0], VOIDmode))
17556 gcc_assert (!MEM_P (operands[0])
17557 || offsettable_memref_p (operands[0]));
17559 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
17560 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
17562 /* When emitting push, take care for source operands on the stack. */
17563 if (push && MEM_P (operands[1])
17564 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
17566 rtx src_base = XEXP (part[1][nparts - 1], 0);
17568 /* Compensate for the stack decrement by 4. */
17569 if (!TARGET_64BIT && nparts == 3
17570 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
17571 src_base = plus_constant (src_base, 4);
17573 /* src_base refers to the stack pointer and is
17574 automatically decreased by emitted push. */
17575 for (i = 0; i < nparts; i++)
17576 part[1][i] = change_address (part[1][i],
17577 GET_MODE (part[1][i]), src_base);
17580 /* We need to do copy in the right order in case an address register
17581 of the source overlaps the destination. */
17582 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
17586 for (i = 0; i < nparts; i++)
17589 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
17590 if (collisionparts[i])
17594 /* Collision in the middle part can be handled by reordering. */
17595 if (collisions == 1 && nparts == 3 && collisionparts [1])
17597 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
17598 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
17600 else if (collisions == 1
17602 && (collisionparts [1] || collisionparts [2]))
17604 if (collisionparts [1])
17606 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
17607 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
17611 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
17612 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
17616 /* If there are more collisions, we can't handle it by reordering.
17617 Do an lea to the last part and use only one colliding move. */
17618 else if (collisions > 1)
17624 base = part[0][nparts - 1];
17626 /* Handle the case when the last part isn't valid for lea.
17627 Happens in 64-bit mode storing the 12-byte XFmode. */
17628 if (GET_MODE (base) != Pmode)
17629 base = gen_rtx_REG (Pmode, REGNO (base));
17631 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
17632 part[1][0] = replace_equiv_address (part[1][0], base);
17633 for (i = 1; i < nparts; i++)
17635 tmp = plus_constant (base, UNITS_PER_WORD * i);
17636 part[1][i] = replace_equiv_address (part[1][i], tmp);
17647 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
17648 emit_insn (gen_addsi3 (stack_pointer_rtx,
17649 stack_pointer_rtx, GEN_INT (-4)));
17650 emit_move_insn (part[0][2], part[1][2]);
17652 else if (nparts == 4)
17654 emit_move_insn (part[0][3], part[1][3]);
17655 emit_move_insn (part[0][2], part[1][2]);
17660 /* In 64bit mode we don't have 32bit push available. In case this is
17661 register, it is OK - we will just use larger counterpart. We also
17662 retype memory - these comes from attempt to avoid REX prefix on
17663 moving of second half of TFmode value. */
17664 if (GET_MODE (part[1][1]) == SImode)
17666 switch (GET_CODE (part[1][1]))
17669 part[1][1] = adjust_address (part[1][1], DImode, 0);
17673 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
17677 gcc_unreachable ();
17680 if (GET_MODE (part[1][0]) == SImode)
17681 part[1][0] = part[1][1];
17684 emit_move_insn (part[0][1], part[1][1]);
17685 emit_move_insn (part[0][0], part[1][0]);
17689 /* Choose correct order to not overwrite the source before it is copied. */
17690 if ((REG_P (part[0][0])
17691 && REG_P (part[1][1])
17692 && (REGNO (part[0][0]) == REGNO (part[1][1])
17694 && REGNO (part[0][0]) == REGNO (part[1][2]))
17696 && REGNO (part[0][0]) == REGNO (part[1][3]))))
17698 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
17700 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
17702 operands[2 + i] = part[0][j];
17703 operands[6 + i] = part[1][j];
17708 for (i = 0; i < nparts; i++)
17710 operands[2 + i] = part[0][i];
17711 operands[6 + i] = part[1][i];
17715 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
17716 if (optimize_insn_for_size_p ())
17718 for (j = 0; j < nparts - 1; j++)
17719 if (CONST_INT_P (operands[6 + j])
17720 && operands[6 + j] != const0_rtx
17721 && REG_P (operands[2 + j]))
17722 for (i = j; i < nparts - 1; i++)
17723 if (CONST_INT_P (operands[7 + i])
17724 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
17725 operands[7 + i] = operands[2 + j];
17728 for (i = 0; i < nparts; i++)
17729 emit_move_insn (operands[2 + i], operands[6 + i]);
17734 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
17735 left shift by a constant, either using a single shift or
17736 a sequence of add instructions. */
17739 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
17743 emit_insn ((mode == DImode
17745 : gen_adddi3) (operand, operand, operand));
17747 else if (!optimize_insn_for_size_p ()
17748 && count * ix86_cost->add <= ix86_cost->shift_const)
17751 for (i=0; i<count; i++)
17753 emit_insn ((mode == DImode
17755 : gen_adddi3) (operand, operand, operand));
17759 emit_insn ((mode == DImode
17761 : gen_ashldi3) (operand, operand, GEN_INT (count)));
17765 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
17767 rtx low[2], high[2];
17769 const int single_width = mode == DImode ? 32 : 64;
17771 if (CONST_INT_P (operands[2]))
17773 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17774 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17776 if (count >= single_width)
17778 emit_move_insn (high[0], low[1]);
17779 emit_move_insn (low[0], const0_rtx);
17781 if (count > single_width)
17782 ix86_expand_ashl_const (high[0], count - single_width, mode);
17786 if (!rtx_equal_p (operands[0], operands[1]))
17787 emit_move_insn (operands[0], operands[1]);
17788 emit_insn ((mode == DImode
17790 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
17791 ix86_expand_ashl_const (low[0], count, mode);
17796 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17798 if (operands[1] == const1_rtx)
17800 /* Assuming we've chosen a QImode capable registers, then 1 << N
17801 can be done with two 32/64-bit shifts, no branches, no cmoves. */
17802 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
17804 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
17806 ix86_expand_clear (low[0]);
17807 ix86_expand_clear (high[0]);
17808 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
17810 d = gen_lowpart (QImode, low[0]);
17811 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
17812 s = gen_rtx_EQ (QImode, flags, const0_rtx);
17813 emit_insn (gen_rtx_SET (VOIDmode, d, s));
17815 d = gen_lowpart (QImode, high[0]);
17816 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
17817 s = gen_rtx_NE (QImode, flags, const0_rtx);
17818 emit_insn (gen_rtx_SET (VOIDmode, d, s));
17821 /* Otherwise, we can get the same results by manually performing
17822 a bit extract operation on bit 5/6, and then performing the two
17823 shifts. The two methods of getting 0/1 into low/high are exactly
17824 the same size. Avoiding the shift in the bit extract case helps
17825 pentium4 a bit; no one else seems to care much either way. */
17830 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
17831 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
17833 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
17834 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
17836 emit_insn ((mode == DImode
17838 : gen_lshrdi3) (high[0], high[0],
17839 GEN_INT (mode == DImode ? 5 : 6)));
17840 emit_insn ((mode == DImode
17842 : gen_anddi3) (high[0], high[0], const1_rtx));
17843 emit_move_insn (low[0], high[0]);
17844 emit_insn ((mode == DImode
17846 : gen_xordi3) (low[0], low[0], const1_rtx));
17849 emit_insn ((mode == DImode
17851 : gen_ashldi3) (low[0], low[0], operands[2]));
17852 emit_insn ((mode == DImode
17854 : gen_ashldi3) (high[0], high[0], operands[2]));
17858 if (operands[1] == constm1_rtx)
17860 /* For -1 << N, we can avoid the shld instruction, because we
17861 know that we're shifting 0...31/63 ones into a -1. */
17862 emit_move_insn (low[0], constm1_rtx);
17863 if (optimize_insn_for_size_p ())
17864 emit_move_insn (high[0], low[0]);
17866 emit_move_insn (high[0], constm1_rtx);
17870 if (!rtx_equal_p (operands[0], operands[1]))
17871 emit_move_insn (operands[0], operands[1]);
17873 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17874 emit_insn ((mode == DImode
17876 : gen_x86_64_shld) (high[0], low[0], operands[2]));
17879 emit_insn ((mode == DImode
17881 : gen_ashldi3) (low[0], low[0], operands[2]));
17883 if (TARGET_CMOVE && scratch)
17885 ix86_expand_clear (scratch);
17886 emit_insn ((mode == DImode
17887 ? gen_x86_shiftsi_adj_1
17888 : gen_x86_shiftdi_adj_1) (high[0], low[0], operands[2],
17892 emit_insn ((mode == DImode
17893 ? gen_x86_shiftsi_adj_2
17894 : gen_x86_shiftdi_adj_2) (high[0], low[0], operands[2]));
17898 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
17900 rtx low[2], high[2];
17902 const int single_width = mode == DImode ? 32 : 64;
17904 if (CONST_INT_P (operands[2]))
17906 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17907 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17909 if (count == single_width * 2 - 1)
17911 emit_move_insn (high[0], high[1]);
17912 emit_insn ((mode == DImode
17914 : gen_ashrdi3) (high[0], high[0],
17915 GEN_INT (single_width - 1)));
17916 emit_move_insn (low[0], high[0]);
17919 else if (count >= single_width)
17921 emit_move_insn (low[0], high[1]);
17922 emit_move_insn (high[0], low[0]);
17923 emit_insn ((mode == DImode
17925 : gen_ashrdi3) (high[0], high[0],
17926 GEN_INT (single_width - 1)));
17927 if (count > single_width)
17928 emit_insn ((mode == DImode
17930 : gen_ashrdi3) (low[0], low[0],
17931 GEN_INT (count - single_width)));
17935 if (!rtx_equal_p (operands[0], operands[1]))
17936 emit_move_insn (operands[0], operands[1]);
17937 emit_insn ((mode == DImode
17939 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
17940 emit_insn ((mode == DImode
17942 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
17947 if (!rtx_equal_p (operands[0], operands[1]))
17948 emit_move_insn (operands[0], operands[1]);
17950 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17952 emit_insn ((mode == DImode
17954 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
17955 emit_insn ((mode == DImode
17957 : gen_ashrdi3) (high[0], high[0], operands[2]));
17959 if (TARGET_CMOVE && scratch)
17961 emit_move_insn (scratch, high[0]);
17962 emit_insn ((mode == DImode
17964 : gen_ashrdi3) (scratch, scratch,
17965 GEN_INT (single_width - 1)));
17966 emit_insn ((mode == DImode
17967 ? gen_x86_shiftsi_adj_1
17968 : gen_x86_shiftdi_adj_1) (low[0], high[0], operands[2],
17972 emit_insn ((mode == DImode
17973 ? gen_x86_shiftsi_adj_3
17974 : gen_x86_shiftdi_adj_3) (low[0], high[0], operands[2]));
17979 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
17981 rtx low[2], high[2];
17983 const int single_width = mode == DImode ? 32 : 64;
17985 if (CONST_INT_P (operands[2]))
17987 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17988 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17990 if (count >= single_width)
17992 emit_move_insn (low[0], high[1]);
17993 ix86_expand_clear (high[0]);
17995 if (count > single_width)
17996 emit_insn ((mode == DImode
17998 : gen_lshrdi3) (low[0], low[0],
17999 GEN_INT (count - single_width)));
18003 if (!rtx_equal_p (operands[0], operands[1]))
18004 emit_move_insn (operands[0], operands[1]);
18005 emit_insn ((mode == DImode
18007 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
18008 emit_insn ((mode == DImode
18010 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
18015 if (!rtx_equal_p (operands[0], operands[1]))
18016 emit_move_insn (operands[0], operands[1]);
18018 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
18020 emit_insn ((mode == DImode
18022 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
18023 emit_insn ((mode == DImode
18025 : gen_lshrdi3) (high[0], high[0], operands[2]));
18027 /* Heh. By reversing the arguments, we can reuse this pattern. */
18028 if (TARGET_CMOVE && scratch)
18030 ix86_expand_clear (scratch);
18031 emit_insn ((mode == DImode
18032 ? gen_x86_shiftsi_adj_1
18033 : gen_x86_shiftdi_adj_1) (low[0], high[0], operands[2],
18037 emit_insn ((mode == DImode
18038 ? gen_x86_shiftsi_adj_2
18039 : gen_x86_shiftdi_adj_2) (low[0], high[0], operands[2]));
18043 /* Predict just emitted jump instruction to be taken with probability PROB. */
18045 predict_jump (int prob)
18047 rtx insn = get_last_insn ();
18048 gcc_assert (JUMP_P (insn));
18049 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
18052 /* Helper function for the string operations below. Dest VARIABLE whether
18053 it is aligned to VALUE bytes. If true, jump to the label. */
18055 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
18057 rtx label = gen_label_rtx ();
18058 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
18059 if (GET_MODE (variable) == DImode)
18060 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
18062 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
18063 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
18066 predict_jump (REG_BR_PROB_BASE * 50 / 100);
18068 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18072 /* Adjust COUNTER by the VALUE. */
18074 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
18076 if (GET_MODE (countreg) == DImode)
18077 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
18079 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
18082 /* Zero extend possibly SImode EXP to Pmode register. */
18084 ix86_zero_extend_to_Pmode (rtx exp)
18087 if (GET_MODE (exp) == VOIDmode)
18088 return force_reg (Pmode, exp);
18089 if (GET_MODE (exp) == Pmode)
18090 return copy_to_mode_reg (Pmode, exp);
18091 r = gen_reg_rtx (Pmode);
18092 emit_insn (gen_zero_extendsidi2 (r, exp));
18096 /* Divide COUNTREG by SCALE. */
18098 scale_counter (rtx countreg, int scale)
18104 if (CONST_INT_P (countreg))
18105 return GEN_INT (INTVAL (countreg) / scale);
18106 gcc_assert (REG_P (countreg));
18108 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
18109 GEN_INT (exact_log2 (scale)),
18110 NULL, 1, OPTAB_DIRECT);
18114 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
18115 DImode for constant loop counts. */
18117 static enum machine_mode
18118 counter_mode (rtx count_exp)
18120 if (GET_MODE (count_exp) != VOIDmode)
18121 return GET_MODE (count_exp);
18122 if (!CONST_INT_P (count_exp))
18124 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
18129 /* When SRCPTR is non-NULL, output simple loop to move memory
18130 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
18131 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
18132 equivalent loop to set memory by VALUE (supposed to be in MODE).
18134 The size is rounded down to whole number of chunk size moved at once.
18135 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
18139 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
18140 rtx destptr, rtx srcptr, rtx value,
18141 rtx count, enum machine_mode mode, int unroll,
18144 rtx out_label, top_label, iter, tmp;
18145 enum machine_mode iter_mode = counter_mode (count);
18146 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
18147 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
18153 top_label = gen_label_rtx ();
18154 out_label = gen_label_rtx ();
18155 iter = gen_reg_rtx (iter_mode);
18157 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
18158 NULL, 1, OPTAB_DIRECT);
18159 /* Those two should combine. */
18160 if (piece_size == const1_rtx)
18162 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
18164 predict_jump (REG_BR_PROB_BASE * 10 / 100);
18166 emit_move_insn (iter, const0_rtx);
18168 emit_label (top_label);
18170 tmp = convert_modes (Pmode, iter_mode, iter, true);
18171 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
18172 destmem = change_address (destmem, mode, x_addr);
18176 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
18177 srcmem = change_address (srcmem, mode, y_addr);
18179 /* When unrolling for chips that reorder memory reads and writes,
18180 we can save registers by using single temporary.
18181 Also using 4 temporaries is overkill in 32bit mode. */
18182 if (!TARGET_64BIT && 0)
18184 for (i = 0; i < unroll; i++)
18189 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
18191 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
18193 emit_move_insn (destmem, srcmem);
18199 gcc_assert (unroll <= 4);
18200 for (i = 0; i < unroll; i++)
18202 tmpreg[i] = gen_reg_rtx (mode);
18206 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
18208 emit_move_insn (tmpreg[i], srcmem);
18210 for (i = 0; i < unroll; i++)
18215 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
18217 emit_move_insn (destmem, tmpreg[i]);
18222 for (i = 0; i < unroll; i++)
18226 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
18227 emit_move_insn (destmem, value);
18230 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
18231 true, OPTAB_LIB_WIDEN);
18233 emit_move_insn (iter, tmp);
18235 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
18237 if (expected_size != -1)
18239 expected_size /= GET_MODE_SIZE (mode) * unroll;
18240 if (expected_size == 0)
18242 else if (expected_size > REG_BR_PROB_BASE)
18243 predict_jump (REG_BR_PROB_BASE - 1);
18245 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
18248 predict_jump (REG_BR_PROB_BASE * 80 / 100);
18249 iter = ix86_zero_extend_to_Pmode (iter);
18250 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
18251 true, OPTAB_LIB_WIDEN);
18252 if (tmp != destptr)
18253 emit_move_insn (destptr, tmp);
18256 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
18257 true, OPTAB_LIB_WIDEN);
18259 emit_move_insn (srcptr, tmp);
18261 emit_label (out_label);
18264 /* Output "rep; mov" instruction.
18265 Arguments have same meaning as for previous function */
18267 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
18268 rtx destptr, rtx srcptr,
18270 enum machine_mode mode)
18276 /* If the size is known, it is shorter to use rep movs. */
18277 if (mode == QImode && CONST_INT_P (count)
18278 && !(INTVAL (count) & 3))
18281 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
18282 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
18283 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
18284 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
18285 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
18286 if (mode != QImode)
18288 destexp = gen_rtx_ASHIFT (Pmode, countreg,
18289 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
18290 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
18291 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
18292 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
18293 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
18297 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
18298 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
18300 if (CONST_INT_P (count))
18302 count = GEN_INT (INTVAL (count)
18303 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
18304 destmem = shallow_copy_rtx (destmem);
18305 srcmem = shallow_copy_rtx (srcmem);
18306 set_mem_size (destmem, count);
18307 set_mem_size (srcmem, count);
18311 if (MEM_SIZE (destmem))
18312 set_mem_size (destmem, NULL_RTX);
18313 if (MEM_SIZE (srcmem))
18314 set_mem_size (srcmem, NULL_RTX);
18316 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
18320 /* Output "rep; stos" instruction.
18321 Arguments have same meaning as for previous function */
18323 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
18324 rtx count, enum machine_mode mode,
18330 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
18331 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
18332 value = force_reg (mode, gen_lowpart (mode, value));
18333 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
18334 if (mode != QImode)
18336 destexp = gen_rtx_ASHIFT (Pmode, countreg,
18337 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
18338 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
18341 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
18342 if (orig_value == const0_rtx && CONST_INT_P (count))
18344 count = GEN_INT (INTVAL (count)
18345 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
18346 destmem = shallow_copy_rtx (destmem);
18347 set_mem_size (destmem, count);
18349 else if (MEM_SIZE (destmem))
18350 set_mem_size (destmem, NULL_RTX);
18351 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
18355 emit_strmov (rtx destmem, rtx srcmem,
18356 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
18358 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
18359 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
18360 emit_insn (gen_strmov (destptr, dest, srcptr, src));
18363 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
18365 expand_movmem_epilogue (rtx destmem, rtx srcmem,
18366 rtx destptr, rtx srcptr, rtx count, int max_size)
18369 if (CONST_INT_P (count))
18371 HOST_WIDE_INT countval = INTVAL (count);
18374 if ((countval & 0x10) && max_size > 16)
18378 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
18379 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
18382 gcc_unreachable ();
18385 if ((countval & 0x08) && max_size > 8)
18388 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
18391 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
18392 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
18396 if ((countval & 0x04) && max_size > 4)
18398 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
18401 if ((countval & 0x02) && max_size > 2)
18403 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
18406 if ((countval & 0x01) && max_size > 1)
18408 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
18415 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
18416 count, 1, OPTAB_DIRECT);
18417 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
18418 count, QImode, 1, 4);
18422 /* When there are stringops, we can cheaply increase dest and src pointers.
18423 Otherwise we save code size by maintaining offset (zero is readily
18424 available from preceding rep operation) and using x86 addressing modes.
18426 if (TARGET_SINGLE_STRINGOP)
18430 rtx label = ix86_expand_aligntest (count, 4, true);
18431 src = change_address (srcmem, SImode, srcptr);
18432 dest = change_address (destmem, SImode, destptr);
18433 emit_insn (gen_strmov (destptr, dest, srcptr, src));
18434 emit_label (label);
18435 LABEL_NUSES (label) = 1;
18439 rtx label = ix86_expand_aligntest (count, 2, true);
18440 src = change_address (srcmem, HImode, srcptr);
18441 dest = change_address (destmem, HImode, destptr);
18442 emit_insn (gen_strmov (destptr, dest, srcptr, src));
18443 emit_label (label);
18444 LABEL_NUSES (label) = 1;
18448 rtx label = ix86_expand_aligntest (count, 1, true);
18449 src = change_address (srcmem, QImode, srcptr);
18450 dest = change_address (destmem, QImode, destptr);
18451 emit_insn (gen_strmov (destptr, dest, srcptr, src));
18452 emit_label (label);
18453 LABEL_NUSES (label) = 1;
18458 rtx offset = force_reg (Pmode, const0_rtx);
18463 rtx label = ix86_expand_aligntest (count, 4, true);
18464 src = change_address (srcmem, SImode, srcptr);
18465 dest = change_address (destmem, SImode, destptr);
18466 emit_move_insn (dest, src);
18467 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
18468 true, OPTAB_LIB_WIDEN);
18470 emit_move_insn (offset, tmp);
18471 emit_label (label);
18472 LABEL_NUSES (label) = 1;
18476 rtx label = ix86_expand_aligntest (count, 2, true);
18477 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
18478 src = change_address (srcmem, HImode, tmp);
18479 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
18480 dest = change_address (destmem, HImode, tmp);
18481 emit_move_insn (dest, src);
18482 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
18483 true, OPTAB_LIB_WIDEN);
18485 emit_move_insn (offset, tmp);
18486 emit_label (label);
18487 LABEL_NUSES (label) = 1;
18491 rtx label = ix86_expand_aligntest (count, 1, true);
18492 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
18493 src = change_address (srcmem, QImode, tmp);
18494 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
18495 dest = change_address (destmem, QImode, tmp);
18496 emit_move_insn (dest, src);
18497 emit_label (label);
18498 LABEL_NUSES (label) = 1;
18503 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
18505 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
18506 rtx count, int max_size)
18509 expand_simple_binop (counter_mode (count), AND, count,
18510 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
18511 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
18512 gen_lowpart (QImode, value), count, QImode,
18516 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
18518 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
18522 if (CONST_INT_P (count))
18524 HOST_WIDE_INT countval = INTVAL (count);
18527 if ((countval & 0x10) && max_size > 16)
18531 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
18532 emit_insn (gen_strset (destptr, dest, value));
18533 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
18534 emit_insn (gen_strset (destptr, dest, value));
18537 gcc_unreachable ();
18540 if ((countval & 0x08) && max_size > 8)
18544 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
18545 emit_insn (gen_strset (destptr, dest, value));
18549 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
18550 emit_insn (gen_strset (destptr, dest, value));
18551 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
18552 emit_insn (gen_strset (destptr, dest, value));
18556 if ((countval & 0x04) && max_size > 4)
18558 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
18559 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
18562 if ((countval & 0x02) && max_size > 2)
18564 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
18565 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
18568 if ((countval & 0x01) && max_size > 1)
18570 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
18571 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
18578 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
18583 rtx label = ix86_expand_aligntest (count, 16, true);
18586 dest = change_address (destmem, DImode, destptr);
18587 emit_insn (gen_strset (destptr, dest, value));
18588 emit_insn (gen_strset (destptr, dest, value));
18592 dest = change_address (destmem, SImode, destptr);
18593 emit_insn (gen_strset (destptr, dest, value));
18594 emit_insn (gen_strset (destptr, dest, value));
18595 emit_insn (gen_strset (destptr, dest, value));
18596 emit_insn (gen_strset (destptr, dest, value));
18598 emit_label (label);
18599 LABEL_NUSES (label) = 1;
18603 rtx label = ix86_expand_aligntest (count, 8, true);
18606 dest = change_address (destmem, DImode, destptr);
18607 emit_insn (gen_strset (destptr, dest, value));
18611 dest = change_address (destmem, SImode, destptr);
18612 emit_insn (gen_strset (destptr, dest, value));
18613 emit_insn (gen_strset (destptr, dest, value));
18615 emit_label (label);
18616 LABEL_NUSES (label) = 1;
18620 rtx label = ix86_expand_aligntest (count, 4, true);
18621 dest = change_address (destmem, SImode, destptr);
18622 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
18623 emit_label (label);
18624 LABEL_NUSES (label) = 1;
18628 rtx label = ix86_expand_aligntest (count, 2, true);
18629 dest = change_address (destmem, HImode, destptr);
18630 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
18631 emit_label (label);
18632 LABEL_NUSES (label) = 1;
18636 rtx label = ix86_expand_aligntest (count, 1, true);
18637 dest = change_address (destmem, QImode, destptr);
18638 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
18639 emit_label (label);
18640 LABEL_NUSES (label) = 1;
18644 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
18645 DESIRED_ALIGNMENT. */
18647 expand_movmem_prologue (rtx destmem, rtx srcmem,
18648 rtx destptr, rtx srcptr, rtx count,
18649 int align, int desired_alignment)
18651 if (align <= 1 && desired_alignment > 1)
18653 rtx label = ix86_expand_aligntest (destptr, 1, false);
18654 srcmem = change_address (srcmem, QImode, srcptr);
18655 destmem = change_address (destmem, QImode, destptr);
18656 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
18657 ix86_adjust_counter (count, 1);
18658 emit_label (label);
18659 LABEL_NUSES (label) = 1;
18661 if (align <= 2 && desired_alignment > 2)
18663 rtx label = ix86_expand_aligntest (destptr, 2, false);
18664 srcmem = change_address (srcmem, HImode, srcptr);
18665 destmem = change_address (destmem, HImode, destptr);
18666 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
18667 ix86_adjust_counter (count, 2);
18668 emit_label (label);
18669 LABEL_NUSES (label) = 1;
18671 if (align <= 4 && desired_alignment > 4)
18673 rtx label = ix86_expand_aligntest (destptr, 4, false);
18674 srcmem = change_address (srcmem, SImode, srcptr);
18675 destmem = change_address (destmem, SImode, destptr);
18676 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
18677 ix86_adjust_counter (count, 4);
18678 emit_label (label);
18679 LABEL_NUSES (label) = 1;
18681 gcc_assert (desired_alignment <= 8);
18684 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
18685 ALIGN_BYTES is how many bytes need to be copied. */
18687 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
18688 int desired_align, int align_bytes)
18691 rtx src_size, dst_size;
18693 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
18694 if (src_align_bytes >= 0)
18695 src_align_bytes = desired_align - src_align_bytes;
18696 src_size = MEM_SIZE (src);
18697 dst_size = MEM_SIZE (dst);
18698 if (align_bytes & 1)
18700 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
18701 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
18703 emit_insn (gen_strmov (destreg, dst, srcreg, src));
18705 if (align_bytes & 2)
18707 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
18708 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
18709 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
18710 set_mem_align (dst, 2 * BITS_PER_UNIT);
18711 if (src_align_bytes >= 0
18712 && (src_align_bytes & 1) == (align_bytes & 1)
18713 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
18714 set_mem_align (src, 2 * BITS_PER_UNIT);
18716 emit_insn (gen_strmov (destreg, dst, srcreg, src));
18718 if (align_bytes & 4)
18720 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
18721 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
18722 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
18723 set_mem_align (dst, 4 * BITS_PER_UNIT);
18724 if (src_align_bytes >= 0)
18726 unsigned int src_align = 0;
18727 if ((src_align_bytes & 3) == (align_bytes & 3))
18729 else if ((src_align_bytes & 1) == (align_bytes & 1))
18731 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
18732 set_mem_align (src, src_align * BITS_PER_UNIT);
18735 emit_insn (gen_strmov (destreg, dst, srcreg, src));
18737 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
18738 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
18739 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
18740 set_mem_align (dst, desired_align * BITS_PER_UNIT);
18741 if (src_align_bytes >= 0)
18743 unsigned int src_align = 0;
18744 if ((src_align_bytes & 7) == (align_bytes & 7))
18746 else if ((src_align_bytes & 3) == (align_bytes & 3))
18748 else if ((src_align_bytes & 1) == (align_bytes & 1))
18750 if (src_align > (unsigned int) desired_align)
18751 src_align = desired_align;
18752 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
18753 set_mem_align (src, src_align * BITS_PER_UNIT);
18756 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
18758 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
18763 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
18764 DESIRED_ALIGNMENT. */
18766 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
18767 int align, int desired_alignment)
18769 if (align <= 1 && desired_alignment > 1)
18771 rtx label = ix86_expand_aligntest (destptr, 1, false);
18772 destmem = change_address (destmem, QImode, destptr);
18773 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
18774 ix86_adjust_counter (count, 1);
18775 emit_label (label);
18776 LABEL_NUSES (label) = 1;
18778 if (align <= 2 && desired_alignment > 2)
18780 rtx label = ix86_expand_aligntest (destptr, 2, false);
18781 destmem = change_address (destmem, HImode, destptr);
18782 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
18783 ix86_adjust_counter (count, 2);
18784 emit_label (label);
18785 LABEL_NUSES (label) = 1;
18787 if (align <= 4 && desired_alignment > 4)
18789 rtx label = ix86_expand_aligntest (destptr, 4, false);
18790 destmem = change_address (destmem, SImode, destptr);
18791 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
18792 ix86_adjust_counter (count, 4);
18793 emit_label (label);
18794 LABEL_NUSES (label) = 1;
18796 gcc_assert (desired_alignment <= 8);
18799 /* Set enough from DST to align DST known to by aligned by ALIGN to
18800 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
18802 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
18803 int desired_align, int align_bytes)
18806 rtx dst_size = MEM_SIZE (dst);
18807 if (align_bytes & 1)
18809 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
18811 emit_insn (gen_strset (destreg, dst,
18812 gen_lowpart (QImode, value)));
18814 if (align_bytes & 2)
18816 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
18817 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
18818 set_mem_align (dst, 2 * BITS_PER_UNIT);
18820 emit_insn (gen_strset (destreg, dst,
18821 gen_lowpart (HImode, value)));
18823 if (align_bytes & 4)
18825 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
18826 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
18827 set_mem_align (dst, 4 * BITS_PER_UNIT);
18829 emit_insn (gen_strset (destreg, dst,
18830 gen_lowpart (SImode, value)));
18832 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
18833 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
18834 set_mem_align (dst, desired_align * BITS_PER_UNIT);
18836 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
18840 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
18841 static enum stringop_alg
18842 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
18843 int *dynamic_check)
18845 const struct stringop_algs * algs;
18846 bool optimize_for_speed;
18847 /* Algorithms using the rep prefix want at least edi and ecx;
18848 additionally, memset wants eax and memcpy wants esi. Don't
18849 consider such algorithms if the user has appropriated those
18850 registers for their own purposes. */
18851 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
18853 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
18855 #define ALG_USABLE_P(alg) (rep_prefix_usable \
18856 || (alg != rep_prefix_1_byte \
18857 && alg != rep_prefix_4_byte \
18858 && alg != rep_prefix_8_byte))
18859 const struct processor_costs *cost;
18861 /* Even if the string operation call is cold, we still might spend a lot
18862 of time processing large blocks. */
18863 if (optimize_function_for_size_p (cfun)
18864 || (optimize_insn_for_size_p ()
18865 && expected_size != -1 && expected_size < 256))
18866 optimize_for_speed = false;
18868 optimize_for_speed = true;
18870 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
18872 *dynamic_check = -1;
18874 algs = &cost->memset[TARGET_64BIT != 0];
18876 algs = &cost->memcpy[TARGET_64BIT != 0];
18877 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
18878 return stringop_alg;
18879 /* rep; movq or rep; movl is the smallest variant. */
18880 else if (!optimize_for_speed)
18882 if (!count || (count & 3))
18883 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
18885 return rep_prefix_usable ? rep_prefix_4_byte : loop;
18887 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
18889 else if (expected_size != -1 && expected_size < 4)
18890 return loop_1_byte;
18891 else if (expected_size != -1)
18894 enum stringop_alg alg = libcall;
18895 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
18897 /* We get here if the algorithms that were not libcall-based
18898 were rep-prefix based and we are unable to use rep prefixes
18899 based on global register usage. Break out of the loop and
18900 use the heuristic below. */
18901 if (algs->size[i].max == 0)
18903 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
18905 enum stringop_alg candidate = algs->size[i].alg;
18907 if (candidate != libcall && ALG_USABLE_P (candidate))
18909 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
18910 last non-libcall inline algorithm. */
18911 if (TARGET_INLINE_ALL_STRINGOPS)
18913 /* When the current size is best to be copied by a libcall,
18914 but we are still forced to inline, run the heuristic below
18915 that will pick code for medium sized blocks. */
18916 if (alg != libcall)
18920 else if (ALG_USABLE_P (candidate))
18924 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
18926 /* When asked to inline the call anyway, try to pick meaningful choice.
18927 We look for maximal size of block that is faster to copy by hand and
18928 take blocks of at most of that size guessing that average size will
18929 be roughly half of the block.
18931 If this turns out to be bad, we might simply specify the preferred
18932 choice in ix86_costs. */
18933 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18934 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
18937 enum stringop_alg alg;
18939 bool any_alg_usable_p = true;
18941 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
18943 enum stringop_alg candidate = algs->size[i].alg;
18944 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
18946 if (candidate != libcall && candidate
18947 && ALG_USABLE_P (candidate))
18948 max = algs->size[i].max;
18950 /* If there aren't any usable algorithms, then recursing on
18951 smaller sizes isn't going to find anything. Just return the
18952 simple byte-at-a-time copy loop. */
18953 if (!any_alg_usable_p)
18955 /* Pick something reasonable. */
18956 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18957 *dynamic_check = 128;
18958 return loop_1_byte;
18962 alg = decide_alg (count, max / 2, memset, dynamic_check);
18963 gcc_assert (*dynamic_check == -1);
18964 gcc_assert (alg != libcall);
18965 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18966 *dynamic_check = max;
18969 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
18970 #undef ALG_USABLE_P
18973 /* Decide on alignment. We know that the operand is already aligned to ALIGN
18974 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
18976 decide_alignment (int align,
18977 enum stringop_alg alg,
18980 int desired_align = 0;
18984 gcc_unreachable ();
18986 case unrolled_loop:
18987 desired_align = GET_MODE_SIZE (Pmode);
18989 case rep_prefix_8_byte:
18992 case rep_prefix_4_byte:
18993 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
18994 copying whole cacheline at once. */
18995 if (TARGET_PENTIUMPRO)
19000 case rep_prefix_1_byte:
19001 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
19002 copying whole cacheline at once. */
19003 if (TARGET_PENTIUMPRO)
19017 if (desired_align < align)
19018 desired_align = align;
19019 if (expected_size != -1 && expected_size < 4)
19020 desired_align = align;
19021 return desired_align;
19024 /* Return the smallest power of 2 greater than VAL. */
19026 smallest_pow2_greater_than (int val)
19034 /* Expand string move (memcpy) operation. Use i386 string operations when
19035 profitable. expand_setmem contains similar code. The code depends upon
19036 architecture, block size and alignment, but always has the same
19039 1) Prologue guard: Conditional that jumps up to epilogues for small
19040 blocks that can be handled by epilogue alone. This is faster but
19041 also needed for correctness, since prologue assume the block is larger
19042 than the desired alignment.
19044 Optional dynamic check for size and libcall for large
19045 blocks is emitted here too, with -minline-stringops-dynamically.
19047 2) Prologue: copy first few bytes in order to get destination aligned
19048 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
19049 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
19050 We emit either a jump tree on power of two sized blocks, or a byte loop.
19052 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
19053 with specified algorithm.
19055 4) Epilogue: code copying tail of the block that is too small to be
19056 handled by main body (or up to size guarded by prologue guard). */
19059 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
19060 rtx expected_align_exp, rtx expected_size_exp)
19066 rtx jump_around_label = NULL;
19067 HOST_WIDE_INT align = 1;
19068 unsigned HOST_WIDE_INT count = 0;
19069 HOST_WIDE_INT expected_size = -1;
19070 int size_needed = 0, epilogue_size_needed;
19071 int desired_align = 0, align_bytes = 0;
19072 enum stringop_alg alg;
19074 bool need_zero_guard = false;
19076 if (CONST_INT_P (align_exp))
19077 align = INTVAL (align_exp);
19078 /* i386 can do misaligned access on reasonably increased cost. */
19079 if (CONST_INT_P (expected_align_exp)
19080 && INTVAL (expected_align_exp) > align)
19081 align = INTVAL (expected_align_exp);
19082 /* ALIGN is the minimum of destination and source alignment, but we care here
19083 just about destination alignment. */
19084 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
19085 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
19087 if (CONST_INT_P (count_exp))
19088 count = expected_size = INTVAL (count_exp);
19089 if (CONST_INT_P (expected_size_exp) && count == 0)
19090 expected_size = INTVAL (expected_size_exp);
19092 /* Make sure we don't need to care about overflow later on. */
19093 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
19096 /* Step 0: Decide on preferred algorithm, desired alignment and
19097 size of chunks to be copied by main loop. */
19099 alg = decide_alg (count, expected_size, false, &dynamic_check);
19100 desired_align = decide_alignment (align, alg, expected_size);
19102 if (!TARGET_ALIGN_STRINGOPS)
19103 align = desired_align;
19105 if (alg == libcall)
19107 gcc_assert (alg != no_stringop);
19109 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
19110 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
19111 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
19116 gcc_unreachable ();
19118 need_zero_guard = true;
19119 size_needed = GET_MODE_SIZE (Pmode);
19121 case unrolled_loop:
19122 need_zero_guard = true;
19123 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
19125 case rep_prefix_8_byte:
19128 case rep_prefix_4_byte:
19131 case rep_prefix_1_byte:
19135 need_zero_guard = true;
19140 epilogue_size_needed = size_needed;
19142 /* Step 1: Prologue guard. */
19144 /* Alignment code needs count to be in register. */
19145 if (CONST_INT_P (count_exp) && desired_align > align)
19147 if (INTVAL (count_exp) > desired_align
19148 && INTVAL (count_exp) > size_needed)
19151 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
19152 if (align_bytes <= 0)
19155 align_bytes = desired_align - align_bytes;
19157 if (align_bytes == 0)
19158 count_exp = force_reg (counter_mode (count_exp), count_exp);
19160 gcc_assert (desired_align >= 1 && align >= 1);
19162 /* Ensure that alignment prologue won't copy past end of block. */
19163 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
19165 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
19166 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
19167 Make sure it is power of 2. */
19168 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
19172 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
19174 /* If main algorithm works on QImode, no epilogue is needed.
19175 For small sizes just don't align anything. */
19176 if (size_needed == 1)
19177 desired_align = align;
19184 label = gen_label_rtx ();
19185 emit_cmp_and_jump_insns (count_exp,
19186 GEN_INT (epilogue_size_needed),
19187 LTU, 0, counter_mode (count_exp), 1, label);
19188 if (expected_size == -1 || expected_size < epilogue_size_needed)
19189 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19191 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19195 /* Emit code to decide on runtime whether library call or inline should be
19197 if (dynamic_check != -1)
19199 if (CONST_INT_P (count_exp))
19201 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
19203 emit_block_move_via_libcall (dst, src, count_exp, false);
19204 count_exp = const0_rtx;
19210 rtx hot_label = gen_label_rtx ();
19211 jump_around_label = gen_label_rtx ();
19212 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
19213 LEU, 0, GET_MODE (count_exp), 1, hot_label);
19214 predict_jump (REG_BR_PROB_BASE * 90 / 100);
19215 emit_block_move_via_libcall (dst, src, count_exp, false);
19216 emit_jump (jump_around_label);
19217 emit_label (hot_label);
19221 /* Step 2: Alignment prologue. */
19223 if (desired_align > align)
19225 if (align_bytes == 0)
19227 /* Except for the first move in epilogue, we no longer know
19228 constant offset in aliasing info. It don't seems to worth
19229 the pain to maintain it for the first move, so throw away
19231 src = change_address (src, BLKmode, srcreg);
19232 dst = change_address (dst, BLKmode, destreg);
19233 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
19238 /* If we know how many bytes need to be stored before dst is
19239 sufficiently aligned, maintain aliasing info accurately. */
19240 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
19241 desired_align, align_bytes);
19242 count_exp = plus_constant (count_exp, -align_bytes);
19243 count -= align_bytes;
19245 if (need_zero_guard
19246 && (count < (unsigned HOST_WIDE_INT) size_needed
19247 || (align_bytes == 0
19248 && count < ((unsigned HOST_WIDE_INT) size_needed
19249 + desired_align - align))))
19251 /* It is possible that we copied enough so the main loop will not
19253 gcc_assert (size_needed > 1);
19254 if (label == NULL_RTX)
19255 label = gen_label_rtx ();
19256 emit_cmp_and_jump_insns (count_exp,
19257 GEN_INT (size_needed),
19258 LTU, 0, counter_mode (count_exp), 1, label);
19259 if (expected_size == -1
19260 || expected_size < (desired_align - align) / 2 + size_needed)
19261 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19263 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19266 if (label && size_needed == 1)
19268 emit_label (label);
19269 LABEL_NUSES (label) = 1;
19271 epilogue_size_needed = 1;
19273 else if (label == NULL_RTX)
19274 epilogue_size_needed = size_needed;
19276 /* Step 3: Main loop. */
19282 gcc_unreachable ();
19284 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
19285 count_exp, QImode, 1, expected_size);
19288 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
19289 count_exp, Pmode, 1, expected_size);
19291 case unrolled_loop:
19292 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
19293 registers for 4 temporaries anyway. */
19294 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
19295 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
19298 case rep_prefix_8_byte:
19299 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
19302 case rep_prefix_4_byte:
19303 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
19306 case rep_prefix_1_byte:
19307 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
19311 /* Adjust properly the offset of src and dest memory for aliasing. */
19312 if (CONST_INT_P (count_exp))
19314 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
19315 (count / size_needed) * size_needed);
19316 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
19317 (count / size_needed) * size_needed);
19321 src = change_address (src, BLKmode, srcreg);
19322 dst = change_address (dst, BLKmode, destreg);
19325 /* Step 4: Epilogue to copy the remaining bytes. */
19329 /* When the main loop is done, COUNT_EXP might hold original count,
19330 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
19331 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
19332 bytes. Compensate if needed. */
19334 if (size_needed < epilogue_size_needed)
19337 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
19338 GEN_INT (size_needed - 1), count_exp, 1,
19340 if (tmp != count_exp)
19341 emit_move_insn (count_exp, tmp);
19343 emit_label (label);
19344 LABEL_NUSES (label) = 1;
19347 if (count_exp != const0_rtx && epilogue_size_needed > 1)
19348 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
19349 epilogue_size_needed);
19350 if (jump_around_label)
19351 emit_label (jump_around_label);
19355 /* Helper function for memcpy. For QImode value 0xXY produce
19356 0xXYXYXYXY of wide specified by MODE. This is essentially
19357 a * 0x10101010, but we can do slightly better than
19358 synth_mult by unwinding the sequence by hand on CPUs with
19361 promote_duplicated_reg (enum machine_mode mode, rtx val)
19363 enum machine_mode valmode = GET_MODE (val);
19365 int nops = mode == DImode ? 3 : 2;
19367 gcc_assert (mode == SImode || mode == DImode);
19368 if (val == const0_rtx)
19369 return copy_to_mode_reg (mode, const0_rtx);
19370 if (CONST_INT_P (val))
19372 HOST_WIDE_INT v = INTVAL (val) & 255;
19376 if (mode == DImode)
19377 v |= (v << 16) << 16;
19378 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
19381 if (valmode == VOIDmode)
19383 if (valmode != QImode)
19384 val = gen_lowpart (QImode, val);
19385 if (mode == QImode)
19387 if (!TARGET_PARTIAL_REG_STALL)
19389 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
19390 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
19391 <= (ix86_cost->shift_const + ix86_cost->add) * nops
19392 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
19394 rtx reg = convert_modes (mode, QImode, val, true);
19395 tmp = promote_duplicated_reg (mode, const1_rtx);
19396 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
19401 rtx reg = convert_modes (mode, QImode, val, true);
19403 if (!TARGET_PARTIAL_REG_STALL)
19404 if (mode == SImode)
19405 emit_insn (gen_movsi_insv_1 (reg, reg));
19407 emit_insn (gen_movdi_insv_1 (reg, reg));
19410 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
19411 NULL, 1, OPTAB_DIRECT);
19413 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
19415 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
19416 NULL, 1, OPTAB_DIRECT);
19417 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
19418 if (mode == SImode)
19420 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
19421 NULL, 1, OPTAB_DIRECT);
19422 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
19427 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
19428 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
19429 alignment from ALIGN to DESIRED_ALIGN. */
19431 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
19436 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
19437 promoted_val = promote_duplicated_reg (DImode, val);
19438 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
19439 promoted_val = promote_duplicated_reg (SImode, val);
19440 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
19441 promoted_val = promote_duplicated_reg (HImode, val);
19443 promoted_val = val;
19445 return promoted_val;
19448 /* Expand string clear operation (bzero). Use i386 string operations when
19449 profitable. See expand_movmem comment for explanation of individual
19450 steps performed. */
19452 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
19453 rtx expected_align_exp, rtx expected_size_exp)
19458 rtx jump_around_label = NULL;
19459 HOST_WIDE_INT align = 1;
19460 unsigned HOST_WIDE_INT count = 0;
19461 HOST_WIDE_INT expected_size = -1;
19462 int size_needed = 0, epilogue_size_needed;
19463 int desired_align = 0, align_bytes = 0;
19464 enum stringop_alg alg;
19465 rtx promoted_val = NULL;
19466 bool force_loopy_epilogue = false;
19468 bool need_zero_guard = false;
19470 if (CONST_INT_P (align_exp))
19471 align = INTVAL (align_exp);
19472 /* i386 can do misaligned access on reasonably increased cost. */
19473 if (CONST_INT_P (expected_align_exp)
19474 && INTVAL (expected_align_exp) > align)
19475 align = INTVAL (expected_align_exp);
19476 if (CONST_INT_P (count_exp))
19477 count = expected_size = INTVAL (count_exp);
19478 if (CONST_INT_P (expected_size_exp) && count == 0)
19479 expected_size = INTVAL (expected_size_exp);
19481 /* Make sure we don't need to care about overflow later on. */
19482 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
19485 /* Step 0: Decide on preferred algorithm, desired alignment and
19486 size of chunks to be copied by main loop. */
19488 alg = decide_alg (count, expected_size, true, &dynamic_check);
19489 desired_align = decide_alignment (align, alg, expected_size);
19491 if (!TARGET_ALIGN_STRINGOPS)
19492 align = desired_align;
19494 if (alg == libcall)
19496 gcc_assert (alg != no_stringop);
19498 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
19499 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
19504 gcc_unreachable ();
19506 need_zero_guard = true;
19507 size_needed = GET_MODE_SIZE (Pmode);
19509 case unrolled_loop:
19510 need_zero_guard = true;
19511 size_needed = GET_MODE_SIZE (Pmode) * 4;
19513 case rep_prefix_8_byte:
19516 case rep_prefix_4_byte:
19519 case rep_prefix_1_byte:
19523 need_zero_guard = true;
19527 epilogue_size_needed = size_needed;
19529 /* Step 1: Prologue guard. */
19531 /* Alignment code needs count to be in register. */
19532 if (CONST_INT_P (count_exp) && desired_align > align)
19534 if (INTVAL (count_exp) > desired_align
19535 && INTVAL (count_exp) > size_needed)
19538 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
19539 if (align_bytes <= 0)
19542 align_bytes = desired_align - align_bytes;
19544 if (align_bytes == 0)
19546 enum machine_mode mode = SImode;
19547 if (TARGET_64BIT && (count & ~0xffffffff))
19549 count_exp = force_reg (mode, count_exp);
19552 /* Do the cheap promotion to allow better CSE across the
19553 main loop and epilogue (ie one load of the big constant in the
19554 front of all code. */
19555 if (CONST_INT_P (val_exp))
19556 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
19557 desired_align, align);
19558 /* Ensure that alignment prologue won't copy past end of block. */
19559 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
19561 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
19562 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
19563 Make sure it is power of 2. */
19564 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
19566 /* To improve performance of small blocks, we jump around the VAL
19567 promoting mode. This mean that if the promoted VAL is not constant,
19568 we might not use it in the epilogue and have to use byte
19570 if (epilogue_size_needed > 2 && !promoted_val)
19571 force_loopy_epilogue = true;
19574 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
19576 /* If main algorithm works on QImode, no epilogue is needed.
19577 For small sizes just don't align anything. */
19578 if (size_needed == 1)
19579 desired_align = align;
19586 label = gen_label_rtx ();
19587 emit_cmp_and_jump_insns (count_exp,
19588 GEN_INT (epilogue_size_needed),
19589 LTU, 0, counter_mode (count_exp), 1, label);
19590 if (expected_size == -1 || expected_size <= epilogue_size_needed)
19591 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19593 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19596 if (dynamic_check != -1)
19598 rtx hot_label = gen_label_rtx ();
19599 jump_around_label = gen_label_rtx ();
19600 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
19601 LEU, 0, counter_mode (count_exp), 1, hot_label);
19602 predict_jump (REG_BR_PROB_BASE * 90 / 100);
19603 set_storage_via_libcall (dst, count_exp, val_exp, false);
19604 emit_jump (jump_around_label);
19605 emit_label (hot_label);
19608 /* Step 2: Alignment prologue. */
19610 /* Do the expensive promotion once we branched off the small blocks. */
19612 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
19613 desired_align, align);
19614 gcc_assert (desired_align >= 1 && align >= 1);
19616 if (desired_align > align)
19618 if (align_bytes == 0)
19620 /* Except for the first move in epilogue, we no longer know
19621 constant offset in aliasing info. It don't seems to worth
19622 the pain to maintain it for the first move, so throw away
19624 dst = change_address (dst, BLKmode, destreg);
19625 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
19630 /* If we know how many bytes need to be stored before dst is
19631 sufficiently aligned, maintain aliasing info accurately. */
19632 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
19633 desired_align, align_bytes);
19634 count_exp = plus_constant (count_exp, -align_bytes);
19635 count -= align_bytes;
19637 if (need_zero_guard
19638 && (count < (unsigned HOST_WIDE_INT) size_needed
19639 || (align_bytes == 0
19640 && count < ((unsigned HOST_WIDE_INT) size_needed
19641 + desired_align - align))))
19643 /* It is possible that we copied enough so the main loop will not
19645 gcc_assert (size_needed > 1);
19646 if (label == NULL_RTX)
19647 label = gen_label_rtx ();
19648 emit_cmp_and_jump_insns (count_exp,
19649 GEN_INT (size_needed),
19650 LTU, 0, counter_mode (count_exp), 1, label);
19651 if (expected_size == -1
19652 || expected_size < (desired_align - align) / 2 + size_needed)
19653 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19655 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19658 if (label && size_needed == 1)
19660 emit_label (label);
19661 LABEL_NUSES (label) = 1;
19663 promoted_val = val_exp;
19664 epilogue_size_needed = 1;
19666 else if (label == NULL_RTX)
19667 epilogue_size_needed = size_needed;
19669 /* Step 3: Main loop. */
19675 gcc_unreachable ();
19677 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
19678 count_exp, QImode, 1, expected_size);
19681 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
19682 count_exp, Pmode, 1, expected_size);
19684 case unrolled_loop:
19685 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
19686 count_exp, Pmode, 4, expected_size);
19688 case rep_prefix_8_byte:
19689 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
19692 case rep_prefix_4_byte:
19693 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
19696 case rep_prefix_1_byte:
19697 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
19701 /* Adjust properly the offset of src and dest memory for aliasing. */
19702 if (CONST_INT_P (count_exp))
19703 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
19704 (count / size_needed) * size_needed);
19706 dst = change_address (dst, BLKmode, destreg);
19708 /* Step 4: Epilogue to copy the remaining bytes. */
19712 /* When the main loop is done, COUNT_EXP might hold original count,
19713 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
19714 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
19715 bytes. Compensate if needed. */
19717 if (size_needed < epilogue_size_needed)
19720 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
19721 GEN_INT (size_needed - 1), count_exp, 1,
19723 if (tmp != count_exp)
19724 emit_move_insn (count_exp, tmp);
19726 emit_label (label);
19727 LABEL_NUSES (label) = 1;
19730 if (count_exp != const0_rtx && epilogue_size_needed > 1)
19732 if (force_loopy_epilogue)
19733 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
19734 epilogue_size_needed);
19736 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
19737 epilogue_size_needed);
19739 if (jump_around_label)
19740 emit_label (jump_around_label);
19744 /* Expand the appropriate insns for doing strlen if not just doing
19747 out = result, initialized with the start address
19748 align_rtx = alignment of the address.
19749 scratch = scratch register, initialized with the startaddress when
19750 not aligned, otherwise undefined
19752 This is just the body. It needs the initializations mentioned above and
19753 some address computing at the end. These things are done in i386.md. */
19756 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
19760 rtx align_2_label = NULL_RTX;
19761 rtx align_3_label = NULL_RTX;
19762 rtx align_4_label = gen_label_rtx ();
19763 rtx end_0_label = gen_label_rtx ();
19765 rtx tmpreg = gen_reg_rtx (SImode);
19766 rtx scratch = gen_reg_rtx (SImode);
19770 if (CONST_INT_P (align_rtx))
19771 align = INTVAL (align_rtx);
19773 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
19775 /* Is there a known alignment and is it less than 4? */
19778 rtx scratch1 = gen_reg_rtx (Pmode);
19779 emit_move_insn (scratch1, out);
19780 /* Is there a known alignment and is it not 2? */
19783 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
19784 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
19786 /* Leave just the 3 lower bits. */
19787 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
19788 NULL_RTX, 0, OPTAB_WIDEN);
19790 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
19791 Pmode, 1, align_4_label);
19792 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
19793 Pmode, 1, align_2_label);
19794 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
19795 Pmode, 1, align_3_label);
19799 /* Since the alignment is 2, we have to check 2 or 0 bytes;
19800 check if is aligned to 4 - byte. */
19802 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
19803 NULL_RTX, 0, OPTAB_WIDEN);
19805 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
19806 Pmode, 1, align_4_label);
19809 mem = change_address (src, QImode, out);
19811 /* Now compare the bytes. */
19813 /* Compare the first n unaligned byte on a byte per byte basis. */
19814 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
19815 QImode, 1, end_0_label);
19817 /* Increment the address. */
19818 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
19820 /* Not needed with an alignment of 2 */
19823 emit_label (align_2_label);
19825 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
19828 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
19830 emit_label (align_3_label);
19833 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
19836 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
19839 /* Generate loop to check 4 bytes at a time. It is not a good idea to
19840 align this loop. It gives only huge programs, but does not help to
19842 emit_label (align_4_label);
19844 mem = change_address (src, SImode, out);
19845 emit_move_insn (scratch, mem);
19846 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
19848 /* This formula yields a nonzero result iff one of the bytes is zero.
19849 This saves three branches inside loop and many cycles. */
19851 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
19852 emit_insn (gen_one_cmplsi2 (scratch, scratch));
19853 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
19854 emit_insn (gen_andsi3 (tmpreg, tmpreg,
19855 gen_int_mode (0x80808080, SImode)));
19856 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
19861 rtx reg = gen_reg_rtx (SImode);
19862 rtx reg2 = gen_reg_rtx (Pmode);
19863 emit_move_insn (reg, tmpreg);
19864 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
19866 /* If zero is not in the first two bytes, move two bytes forward. */
19867 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
19868 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19869 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
19870 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
19871 gen_rtx_IF_THEN_ELSE (SImode, tmp,
19874 /* Emit lea manually to avoid clobbering of flags. */
19875 emit_insn (gen_rtx_SET (SImode, reg2,
19876 gen_rtx_PLUS (Pmode, out, const2_rtx)));
19878 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19879 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
19880 emit_insn (gen_rtx_SET (VOIDmode, out,
19881 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
19887 rtx end_2_label = gen_label_rtx ();
19888 /* Is zero in the first two bytes? */
19890 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
19891 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19892 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
19893 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19894 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
19896 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19897 JUMP_LABEL (tmp) = end_2_label;
19899 /* Not in the first two. Move two bytes forward. */
19900 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
19901 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
19903 emit_label (end_2_label);
19907 /* Avoid branch in fixing the byte. */
19908 tmpreg = gen_lowpart (QImode, tmpreg);
19909 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
19910 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
19911 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
19912 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
19914 emit_label (end_0_label);
19917 /* Expand strlen. */
19920 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
19922 rtx addr, scratch1, scratch2, scratch3, scratch4;
19924 /* The generic case of strlen expander is long. Avoid it's
19925 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
19927 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
19928 && !TARGET_INLINE_ALL_STRINGOPS
19929 && !optimize_insn_for_size_p ()
19930 && (!CONST_INT_P (align) || INTVAL (align) < 4))
19933 addr = force_reg (Pmode, XEXP (src, 0));
19934 scratch1 = gen_reg_rtx (Pmode);
19936 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
19937 && !optimize_insn_for_size_p ())
19939 /* Well it seems that some optimizer does not combine a call like
19940 foo(strlen(bar), strlen(bar));
19941 when the move and the subtraction is done here. It does calculate
19942 the length just once when these instructions are done inside of
19943 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
19944 often used and I use one fewer register for the lifetime of
19945 output_strlen_unroll() this is better. */
19947 emit_move_insn (out, addr);
19949 ix86_expand_strlensi_unroll_1 (out, src, align);
19951 /* strlensi_unroll_1 returns the address of the zero at the end of
19952 the string, like memchr(), so compute the length by subtracting
19953 the start address. */
19954 emit_insn (ix86_gen_sub3 (out, out, addr));
19960 /* Can't use this if the user has appropriated eax, ecx, or edi. */
19961 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
19964 scratch2 = gen_reg_rtx (Pmode);
19965 scratch3 = gen_reg_rtx (Pmode);
19966 scratch4 = force_reg (Pmode, constm1_rtx);
19968 emit_move_insn (scratch3, addr);
19969 eoschar = force_reg (QImode, eoschar);
19971 src = replace_equiv_address_nv (src, scratch3);
19973 /* If .md starts supporting :P, this can be done in .md. */
19974 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
19975 scratch4), UNSPEC_SCAS);
19976 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
19977 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
19978 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
19983 /* For given symbol (function) construct code to compute address of it's PLT
19984 entry in large x86-64 PIC model. */
19986 construct_plt_address (rtx symbol)
19988 rtx tmp = gen_reg_rtx (Pmode);
19989 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
19991 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
19992 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
19994 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
19995 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
20000 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
20002 rtx pop, int sibcall)
20004 rtx use = NULL, call;
20006 if (pop == const0_rtx)
20008 gcc_assert (!TARGET_64BIT || !pop);
20010 if (TARGET_MACHO && !TARGET_64BIT)
20013 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
20014 fnaddr = machopic_indirect_call_target (fnaddr);
20019 /* Static functions and indirect calls don't need the pic register. */
20020 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
20021 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
20022 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
20023 use_reg (&use, pic_offset_table_rtx);
20026 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
20028 rtx al = gen_rtx_REG (QImode, AX_REG);
20029 emit_move_insn (al, callarg2);
20030 use_reg (&use, al);
20033 if (ix86_cmodel == CM_LARGE_PIC
20035 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
20036 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
20037 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
20039 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
20040 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
20042 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
20043 fnaddr = gen_rtx_MEM (QImode, fnaddr);
20046 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
20048 call = gen_rtx_SET (VOIDmode, retval, call);
20051 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
20052 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
20053 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
20056 && ix86_cfun_abi () == MS_ABI
20057 && (!callarg2 || INTVAL (callarg2) != -2))
20059 /* We need to represent that SI and DI registers are clobbered
20061 static int clobbered_registers[] = {
20062 XMM6_REG, XMM7_REG, XMM8_REG,
20063 XMM9_REG, XMM10_REG, XMM11_REG,
20064 XMM12_REG, XMM13_REG, XMM14_REG,
20065 XMM15_REG, SI_REG, DI_REG
20068 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
20069 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
20070 UNSPEC_MS_TO_SYSV_CALL);
20074 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
20075 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
20078 (SSE_REGNO_P (clobbered_registers[i])
20080 clobbered_registers[i]));
20082 call = gen_rtx_PARALLEL (VOIDmode,
20083 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
20087 call = emit_call_insn (call);
20089 CALL_INSN_FUNCTION_USAGE (call) = use;
20093 /* Clear stack slot assignments remembered from previous functions.
20094 This is called from INIT_EXPANDERS once before RTL is emitted for each
20097 static struct machine_function *
20098 ix86_init_machine_status (void)
20100 struct machine_function *f;
20102 f = ggc_alloc_cleared_machine_function ();
20103 f->use_fast_prologue_epilogue_nregs = -1;
20104 f->tls_descriptor_call_expanded_p = 0;
20105 f->call_abi = ix86_abi;
20110 /* Return a MEM corresponding to a stack slot with mode MODE.
20111 Allocate a new slot if necessary.
20113 The RTL for a function can have several slots available: N is
20114 which slot to use. */
20117 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
20119 struct stack_local_entry *s;
20121 gcc_assert (n < MAX_386_STACK_LOCALS);
20123 /* Virtual slot is valid only before vregs are instantiated. */
20124 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
20126 for (s = ix86_stack_locals; s; s = s->next)
20127 if (s->mode == mode && s->n == n)
20128 return copy_rtx (s->rtl);
20130 s = ggc_alloc_stack_local_entry ();
20133 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
20135 s->next = ix86_stack_locals;
20136 ix86_stack_locals = s;
20140 /* Construct the SYMBOL_REF for the tls_get_addr function. */
20142 static GTY(()) rtx ix86_tls_symbol;
20144 ix86_tls_get_addr (void)
20147 if (!ix86_tls_symbol)
20149 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
20150 (TARGET_ANY_GNU_TLS
20152 ? "___tls_get_addr"
20153 : "__tls_get_addr");
20156 return ix86_tls_symbol;
20159 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
20161 static GTY(()) rtx ix86_tls_module_base_symbol;
20163 ix86_tls_module_base (void)
20166 if (!ix86_tls_module_base_symbol)
20168 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
20169 "_TLS_MODULE_BASE_");
20170 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
20171 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
20174 return ix86_tls_module_base_symbol;
20177 /* Calculate the length of the memory address in the instruction
20178 encoding. Does not include the one-byte modrm, opcode, or prefix. */
20181 memory_address_length (rtx addr)
20183 struct ix86_address parts;
20184 rtx base, index, disp;
20188 if (GET_CODE (addr) == PRE_DEC
20189 || GET_CODE (addr) == POST_INC
20190 || GET_CODE (addr) == PRE_MODIFY
20191 || GET_CODE (addr) == POST_MODIFY)
20194 ok = ix86_decompose_address (addr, &parts);
20197 if (parts.base && GET_CODE (parts.base) == SUBREG)
20198 parts.base = SUBREG_REG (parts.base);
20199 if (parts.index && GET_CODE (parts.index) == SUBREG)
20200 parts.index = SUBREG_REG (parts.index);
20203 index = parts.index;
20208 - esp as the base always wants an index,
20209 - ebp as the base always wants a displacement,
20210 - r12 as the base always wants an index,
20211 - r13 as the base always wants a displacement. */
20213 /* Register Indirect. */
20214 if (base && !index && !disp)
20216 /* esp (for its index) and ebp (for its displacement) need
20217 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
20220 && (addr == arg_pointer_rtx
20221 || addr == frame_pointer_rtx
20222 || REGNO (addr) == SP_REG
20223 || REGNO (addr) == BP_REG
20224 || REGNO (addr) == R12_REG
20225 || REGNO (addr) == R13_REG))
20229 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
20230 is not disp32, but disp32(%rip), so for disp32
20231 SIB byte is needed, unless print_operand_address
20232 optimizes it into disp32(%rip) or (%rip) is implied
20234 else if (disp && !base && !index)
20241 if (GET_CODE (disp) == CONST)
20242 symbol = XEXP (disp, 0);
20243 if (GET_CODE (symbol) == PLUS
20244 && CONST_INT_P (XEXP (symbol, 1)))
20245 symbol = XEXP (symbol, 0);
20247 if (GET_CODE (symbol) != LABEL_REF
20248 && (GET_CODE (symbol) != SYMBOL_REF
20249 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
20250 && (GET_CODE (symbol) != UNSPEC
20251 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
20252 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
20259 /* Find the length of the displacement constant. */
20262 if (base && satisfies_constraint_K (disp))
20267 /* ebp always wants a displacement. Similarly r13. */
20268 else if (base && REG_P (base)
20269 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
20272 /* An index requires the two-byte modrm form.... */
20274 /* ...like esp (or r12), which always wants an index. */
20275 || base == arg_pointer_rtx
20276 || base == frame_pointer_rtx
20277 || (base && REG_P (base)
20278 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
20295 /* Compute default value for "length_immediate" attribute. When SHORTFORM
20296 is set, expect that insn have 8bit immediate alternative. */
20298 ix86_attr_length_immediate_default (rtx insn, int shortform)
20302 extract_insn_cached (insn);
20303 for (i = recog_data.n_operands - 1; i >= 0; --i)
20304 if (CONSTANT_P (recog_data.operand[i]))
20306 enum attr_mode mode = get_attr_mode (insn);
20309 if (shortform && CONST_INT_P (recog_data.operand[i]))
20311 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
20318 ival = trunc_int_for_mode (ival, HImode);
20321 ival = trunc_int_for_mode (ival, SImode);
20326 if (IN_RANGE (ival, -128, 127))
20343 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
20348 fatal_insn ("unknown insn mode", insn);
20353 /* Compute default value for "length_address" attribute. */
20355 ix86_attr_length_address_default (rtx insn)
20359 if (get_attr_type (insn) == TYPE_LEA)
20361 rtx set = PATTERN (insn), addr;
20363 if (GET_CODE (set) == PARALLEL)
20364 set = XVECEXP (set, 0, 0);
20366 gcc_assert (GET_CODE (set) == SET);
20368 addr = SET_SRC (set);
20369 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
20371 if (GET_CODE (addr) == ZERO_EXTEND)
20372 addr = XEXP (addr, 0);
20373 if (GET_CODE (addr) == SUBREG)
20374 addr = SUBREG_REG (addr);
20377 return memory_address_length (addr);
20380 extract_insn_cached (insn);
20381 for (i = recog_data.n_operands - 1; i >= 0; --i)
20382 if (MEM_P (recog_data.operand[i]))
20384 constrain_operands_cached (reload_completed);
20385 if (which_alternative != -1)
20387 const char *constraints = recog_data.constraints[i];
20388 int alt = which_alternative;
20390 while (*constraints == '=' || *constraints == '+')
20393 while (*constraints++ != ',')
20395 /* Skip ignored operands. */
20396 if (*constraints == 'X')
20399 return memory_address_length (XEXP (recog_data.operand[i], 0));
20404 /* Compute default value for "length_vex" attribute. It includes
20405 2 or 3 byte VEX prefix and 1 opcode byte. */
20408 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
20413 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
20414 byte VEX prefix. */
20415 if (!has_0f_opcode || has_vex_w)
20418 /* We can always use 2 byte VEX prefix in 32bit. */
20422 extract_insn_cached (insn);
20424 for (i = recog_data.n_operands - 1; i >= 0; --i)
20425 if (REG_P (recog_data.operand[i]))
20427 /* REX.W bit uses 3 byte VEX prefix. */
20428 if (GET_MODE (recog_data.operand[i]) == DImode
20429 && GENERAL_REG_P (recog_data.operand[i]))
20434 /* REX.X or REX.B bits use 3 byte VEX prefix. */
20435 if (MEM_P (recog_data.operand[i])
20436 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
20443 /* Return the maximum number of instructions a cpu can issue. */
20446 ix86_issue_rate (void)
20450 case PROCESSOR_PENTIUM:
20451 case PROCESSOR_ATOM:
20455 case PROCESSOR_PENTIUMPRO:
20456 case PROCESSOR_PENTIUM4:
20457 case PROCESSOR_ATHLON:
20459 case PROCESSOR_AMDFAM10:
20460 case PROCESSOR_NOCONA:
20461 case PROCESSOR_GENERIC32:
20462 case PROCESSOR_GENERIC64:
20463 case PROCESSOR_BDVER1:
20466 case PROCESSOR_CORE2:
20474 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
20475 by DEP_INSN and nothing set by DEP_INSN. */
20478 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
20482 /* Simplify the test for uninteresting insns. */
20483 if (insn_type != TYPE_SETCC
20484 && insn_type != TYPE_ICMOV
20485 && insn_type != TYPE_FCMOV
20486 && insn_type != TYPE_IBR)
20489 if ((set = single_set (dep_insn)) != 0)
20491 set = SET_DEST (set);
20494 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
20495 && XVECLEN (PATTERN (dep_insn), 0) == 2
20496 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
20497 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
20499 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
20500 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
20505 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
20508 /* This test is true if the dependent insn reads the flags but
20509 not any other potentially set register. */
20510 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
20513 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
20519 /* Return true iff USE_INSN has a memory address with operands set by
20523 ix86_agi_dependent (rtx set_insn, rtx use_insn)
20526 extract_insn_cached (use_insn);
20527 for (i = recog_data.n_operands - 1; i >= 0; --i)
20528 if (MEM_P (recog_data.operand[i]))
20530 rtx addr = XEXP (recog_data.operand[i], 0);
20531 return modified_in_p (addr, set_insn) != 0;
20537 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
20539 enum attr_type insn_type, dep_insn_type;
20540 enum attr_memory memory;
20542 int dep_insn_code_number;
20544 /* Anti and output dependencies have zero cost on all CPUs. */
20545 if (REG_NOTE_KIND (link) != 0)
20548 dep_insn_code_number = recog_memoized (dep_insn);
20550 /* If we can't recognize the insns, we can't really do anything. */
20551 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
20554 insn_type = get_attr_type (insn);
20555 dep_insn_type = get_attr_type (dep_insn);
20559 case PROCESSOR_PENTIUM:
20560 /* Address Generation Interlock adds a cycle of latency. */
20561 if (insn_type == TYPE_LEA)
20563 rtx addr = PATTERN (insn);
20565 if (GET_CODE (addr) == PARALLEL)
20566 addr = XVECEXP (addr, 0, 0);
20568 gcc_assert (GET_CODE (addr) == SET);
20570 addr = SET_SRC (addr);
20571 if (modified_in_p (addr, dep_insn))
20574 else if (ix86_agi_dependent (dep_insn, insn))
20577 /* ??? Compares pair with jump/setcc. */
20578 if (ix86_flags_dependent (insn, dep_insn, insn_type))
20581 /* Floating point stores require value to be ready one cycle earlier. */
20582 if (insn_type == TYPE_FMOV
20583 && get_attr_memory (insn) == MEMORY_STORE
20584 && !ix86_agi_dependent (dep_insn, insn))
20588 case PROCESSOR_PENTIUMPRO:
20589 memory = get_attr_memory (insn);
20591 /* INT->FP conversion is expensive. */
20592 if (get_attr_fp_int_src (dep_insn))
20595 /* There is one cycle extra latency between an FP op and a store. */
20596 if (insn_type == TYPE_FMOV
20597 && (set = single_set (dep_insn)) != NULL_RTX
20598 && (set2 = single_set (insn)) != NULL_RTX
20599 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
20600 && MEM_P (SET_DEST (set2)))
20603 /* Show ability of reorder buffer to hide latency of load by executing
20604 in parallel with previous instruction in case
20605 previous instruction is not needed to compute the address. */
20606 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
20607 && !ix86_agi_dependent (dep_insn, insn))
20609 /* Claim moves to take one cycle, as core can issue one load
20610 at time and the next load can start cycle later. */
20611 if (dep_insn_type == TYPE_IMOV
20612 || dep_insn_type == TYPE_FMOV)
20620 memory = get_attr_memory (insn);
20622 /* The esp dependency is resolved before the instruction is really
20624 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
20625 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
20628 /* INT->FP conversion is expensive. */
20629 if (get_attr_fp_int_src (dep_insn))
20632 /* Show ability of reorder buffer to hide latency of load by executing
20633 in parallel with previous instruction in case
20634 previous instruction is not needed to compute the address. */
20635 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
20636 && !ix86_agi_dependent (dep_insn, insn))
20638 /* Claim moves to take one cycle, as core can issue one load
20639 at time and the next load can start cycle later. */
20640 if (dep_insn_type == TYPE_IMOV
20641 || dep_insn_type == TYPE_FMOV)
20650 case PROCESSOR_ATHLON:
20652 case PROCESSOR_AMDFAM10:
20653 case PROCESSOR_BDVER1:
20654 case PROCESSOR_ATOM:
20655 case PROCESSOR_GENERIC32:
20656 case PROCESSOR_GENERIC64:
20657 memory = get_attr_memory (insn);
20659 /* Show ability of reorder buffer to hide latency of load by executing
20660 in parallel with previous instruction in case
20661 previous instruction is not needed to compute the address. */
20662 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
20663 && !ix86_agi_dependent (dep_insn, insn))
20665 enum attr_unit unit = get_attr_unit (insn);
20668 /* Because of the difference between the length of integer and
20669 floating unit pipeline preparation stages, the memory operands
20670 for floating point are cheaper.
20672 ??? For Athlon it the difference is most probably 2. */
20673 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
20676 loadcost = TARGET_ATHLON ? 2 : 0;
20678 if (cost >= loadcost)
20691 /* How many alternative schedules to try. This should be as wide as the
20692 scheduling freedom in the DFA, but no wider. Making this value too
20693 large results extra work for the scheduler. */
20696 ia32_multipass_dfa_lookahead (void)
20700 case PROCESSOR_PENTIUM:
20703 case PROCESSOR_PENTIUMPRO:
20713 /* Compute the alignment given to a constant that is being placed in memory.
20714 EXP is the constant and ALIGN is the alignment that the object would
20716 The value of this function is used instead of that alignment to align
20720 ix86_constant_alignment (tree exp, int align)
20722 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
20723 || TREE_CODE (exp) == INTEGER_CST)
20725 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
20727 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
20730 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
20731 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
20732 return BITS_PER_WORD;
20737 /* Compute the alignment for a static variable.
20738 TYPE is the data type, and ALIGN is the alignment that
20739 the object would ordinarily have. The value of this function is used
20740 instead of that alignment to align the object. */
20743 ix86_data_alignment (tree type, int align)
20745 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
20747 if (AGGREGATE_TYPE_P (type)
20748 && TYPE_SIZE (type)
20749 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
20750 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
20751 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
20752 && align < max_align)
20755 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
20756 to 16byte boundary. */
20759 if (AGGREGATE_TYPE_P (type)
20760 && TYPE_SIZE (type)
20761 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
20762 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
20763 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
20767 if (TREE_CODE (type) == ARRAY_TYPE)
20769 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
20771 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
20774 else if (TREE_CODE (type) == COMPLEX_TYPE)
20777 if (TYPE_MODE (type) == DCmode && align < 64)
20779 if ((TYPE_MODE (type) == XCmode
20780 || TYPE_MODE (type) == TCmode) && align < 128)
20783 else if ((TREE_CODE (type) == RECORD_TYPE
20784 || TREE_CODE (type) == UNION_TYPE
20785 || TREE_CODE (type) == QUAL_UNION_TYPE)
20786 && TYPE_FIELDS (type))
20788 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
20790 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
20793 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
20794 || TREE_CODE (type) == INTEGER_TYPE)
20796 if (TYPE_MODE (type) == DFmode && align < 64)
20798 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
20805 /* Compute the alignment for a local variable or a stack slot. EXP is
20806 the data type or decl itself, MODE is the widest mode available and
20807 ALIGN is the alignment that the object would ordinarily have. The
20808 value of this macro is used instead of that alignment to align the
20812 ix86_local_alignment (tree exp, enum machine_mode mode,
20813 unsigned int align)
20817 if (exp && DECL_P (exp))
20819 type = TREE_TYPE (exp);
20828 /* Don't do dynamic stack realignment for long long objects with
20829 -mpreferred-stack-boundary=2. */
20832 && ix86_preferred_stack_boundary < 64
20833 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
20834 && (!type || !TYPE_USER_ALIGN (type))
20835 && (!decl || !DECL_USER_ALIGN (decl)))
20838 /* If TYPE is NULL, we are allocating a stack slot for caller-save
20839 register in MODE. We will return the largest alignment of XF
20843 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
20844 align = GET_MODE_ALIGNMENT (DFmode);
20848 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
20849 to 16byte boundary. Exact wording is:
20851 An array uses the same alignment as its elements, except that a local or
20852 global array variable of length at least 16 bytes or
20853 a C99 variable-length array variable always has alignment of at least 16 bytes.
20855 This was added to allow use of aligned SSE instructions at arrays. This
20856 rule is meant for static storage (where compiler can not do the analysis
20857 by itself). We follow it for automatic variables only when convenient.
20858 We fully control everything in the function compiled and functions from
20859 other unit can not rely on the alignment.
20861 Exclude va_list type. It is the common case of local array where
20862 we can not benefit from the alignment. */
20863 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
20866 if (AGGREGATE_TYPE_P (type)
20867 && (TYPE_MAIN_VARIANT (type)
20868 != TYPE_MAIN_VARIANT (va_list_type_node))
20869 && TYPE_SIZE (type)
20870 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
20871 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
20872 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
20875 if (TREE_CODE (type) == ARRAY_TYPE)
20877 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
20879 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
20882 else if (TREE_CODE (type) == COMPLEX_TYPE)
20884 if (TYPE_MODE (type) == DCmode && align < 64)
20886 if ((TYPE_MODE (type) == XCmode
20887 || TYPE_MODE (type) == TCmode) && align < 128)
20890 else if ((TREE_CODE (type) == RECORD_TYPE
20891 || TREE_CODE (type) == UNION_TYPE
20892 || TREE_CODE (type) == QUAL_UNION_TYPE)
20893 && TYPE_FIELDS (type))
20895 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
20897 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
20900 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
20901 || TREE_CODE (type) == INTEGER_TYPE)
20904 if (TYPE_MODE (type) == DFmode && align < 64)
20906 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
20912 /* Compute the minimum required alignment for dynamic stack realignment
20913 purposes for a local variable, parameter or a stack slot. EXP is
20914 the data type or decl itself, MODE is its mode and ALIGN is the
20915 alignment that the object would ordinarily have. */
20918 ix86_minimum_alignment (tree exp, enum machine_mode mode,
20919 unsigned int align)
20923 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
20926 if (exp && DECL_P (exp))
20928 type = TREE_TYPE (exp);
20937 /* Don't do dynamic stack realignment for long long objects with
20938 -mpreferred-stack-boundary=2. */
20939 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
20940 && (!type || !TYPE_USER_ALIGN (type))
20941 && (!decl || !DECL_USER_ALIGN (decl)))
20947 /* Find a location for the static chain incoming to a nested function.
20948 This is a register, unless all free registers are used by arguments. */
20951 ix86_static_chain (const_tree fndecl, bool incoming_p)
20955 if (!DECL_STATIC_CHAIN (fndecl))
20960 /* We always use R10 in 64-bit mode. */
20966 /* By default in 32-bit mode we use ECX to pass the static chain. */
20969 fntype = TREE_TYPE (fndecl);
20970 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
20972 /* Fastcall functions use ecx/edx for arguments, which leaves
20973 us with EAX for the static chain. */
20976 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
20978 /* Thiscall functions use ecx for arguments, which leaves
20979 us with EAX for the static chain. */
20982 else if (ix86_function_regparm (fntype, fndecl) == 3)
20984 /* For regparm 3, we have no free call-clobbered registers in
20985 which to store the static chain. In order to implement this,
20986 we have the trampoline push the static chain to the stack.
20987 However, we can't push a value below the return address when
20988 we call the nested function directly, so we have to use an
20989 alternate entry point. For this we use ESI, and have the
20990 alternate entry point push ESI, so that things appear the
20991 same once we're executing the nested function. */
20994 if (fndecl == current_function_decl)
20995 ix86_static_chain_on_stack = true;
20996 return gen_frame_mem (SImode,
20997 plus_constant (arg_pointer_rtx, -8));
21003 return gen_rtx_REG (Pmode, regno);
21006 /* Emit RTL insns to initialize the variable parts of a trampoline.
21007 FNDECL is the decl of the target address; M_TRAMP is a MEM for
21008 the trampoline, and CHAIN_VALUE is an RTX for the static chain
21009 to be passed to the target function. */
21012 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
21016 fnaddr = XEXP (DECL_RTL (fndecl), 0);
21023 /* Depending on the static chain location, either load a register
21024 with a constant, or push the constant to the stack. All of the
21025 instructions are the same size. */
21026 chain = ix86_static_chain (fndecl, true);
21029 if (REGNO (chain) == CX_REG)
21031 else if (REGNO (chain) == AX_REG)
21034 gcc_unreachable ();
21039 mem = adjust_address (m_tramp, QImode, 0);
21040 emit_move_insn (mem, gen_int_mode (opcode, QImode));
21042 mem = adjust_address (m_tramp, SImode, 1);
21043 emit_move_insn (mem, chain_value);
21045 /* Compute offset from the end of the jmp to the target function.
21046 In the case in which the trampoline stores the static chain on
21047 the stack, we need to skip the first insn which pushes the
21048 (call-saved) register static chain; this push is 1 byte. */
21049 disp = expand_binop (SImode, sub_optab, fnaddr,
21050 plus_constant (XEXP (m_tramp, 0),
21051 MEM_P (chain) ? 9 : 10),
21052 NULL_RTX, 1, OPTAB_DIRECT);
21054 mem = adjust_address (m_tramp, QImode, 5);
21055 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
21057 mem = adjust_address (m_tramp, SImode, 6);
21058 emit_move_insn (mem, disp);
21064 /* Load the function address to r11. Try to load address using
21065 the shorter movl instead of movabs. We may want to support
21066 movq for kernel mode, but kernel does not use trampolines at
21068 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
21070 fnaddr = copy_to_mode_reg (DImode, fnaddr);
21072 mem = adjust_address (m_tramp, HImode, offset);
21073 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
21075 mem = adjust_address (m_tramp, SImode, offset + 2);
21076 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
21081 mem = adjust_address (m_tramp, HImode, offset);
21082 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
21084 mem = adjust_address (m_tramp, DImode, offset + 2);
21085 emit_move_insn (mem, fnaddr);
21089 /* Load static chain using movabs to r10. */
21090 mem = adjust_address (m_tramp, HImode, offset);
21091 emit_move_insn (mem, gen_int_mode (0xba49, HImode));
21093 mem = adjust_address (m_tramp, DImode, offset + 2);
21094 emit_move_insn (mem, chain_value);
21097 /* Jump to r11; the last (unused) byte is a nop, only there to
21098 pad the write out to a single 32-bit store. */
21099 mem = adjust_address (m_tramp, SImode, offset);
21100 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
21103 gcc_assert (offset <= TRAMPOLINE_SIZE);
21106 #ifdef ENABLE_EXECUTE_STACK
21107 #ifdef CHECK_EXECUTE_STACK_ENABLED
21108 if (CHECK_EXECUTE_STACK_ENABLED)
21110 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
21111 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
21115 /* The following file contains several enumerations and data structures
21116 built from the definitions in i386-builtin-types.def. */
21118 #include "i386-builtin-types.inc"
21120 /* Table for the ix86 builtin non-function types. */
21121 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
21123 /* Retrieve an element from the above table, building some of
21124 the types lazily. */
21127 ix86_get_builtin_type (enum ix86_builtin_type tcode)
21129 unsigned int index;
21132 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
21134 type = ix86_builtin_type_tab[(int) tcode];
21138 gcc_assert (tcode > IX86_BT_LAST_PRIM);
21139 if (tcode <= IX86_BT_LAST_VECT)
21141 enum machine_mode mode;
21143 index = tcode - IX86_BT_LAST_PRIM - 1;
21144 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
21145 mode = ix86_builtin_type_vect_mode[index];
21147 type = build_vector_type_for_mode (itype, mode);
21153 index = tcode - IX86_BT_LAST_VECT - 1;
21154 if (tcode <= IX86_BT_LAST_PTR)
21155 quals = TYPE_UNQUALIFIED;
21157 quals = TYPE_QUAL_CONST;
21159 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
21160 if (quals != TYPE_UNQUALIFIED)
21161 itype = build_qualified_type (itype, quals);
21163 type = build_pointer_type (itype);
21166 ix86_builtin_type_tab[(int) tcode] = type;
21170 /* Table for the ix86 builtin function types. */
21171 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
21173 /* Retrieve an element from the above table, building some of
21174 the types lazily. */
21177 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
21181 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
21183 type = ix86_builtin_func_type_tab[(int) tcode];
21187 if (tcode <= IX86_BT_LAST_FUNC)
21189 unsigned start = ix86_builtin_func_start[(int) tcode];
21190 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
21191 tree rtype, atype, args = void_list_node;
21194 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
21195 for (i = after - 1; i > start; --i)
21197 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
21198 args = tree_cons (NULL, atype, args);
21201 type = build_function_type (rtype, args);
21205 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
21206 enum ix86_builtin_func_type icode;
21208 icode = ix86_builtin_func_alias_base[index];
21209 type = ix86_get_builtin_func_type (icode);
21212 ix86_builtin_func_type_tab[(int) tcode] = type;
21217 /* Codes for all the SSE/MMX builtins. */
21220 IX86_BUILTIN_ADDPS,
21221 IX86_BUILTIN_ADDSS,
21222 IX86_BUILTIN_DIVPS,
21223 IX86_BUILTIN_DIVSS,
21224 IX86_BUILTIN_MULPS,
21225 IX86_BUILTIN_MULSS,
21226 IX86_BUILTIN_SUBPS,
21227 IX86_BUILTIN_SUBSS,
21229 IX86_BUILTIN_CMPEQPS,
21230 IX86_BUILTIN_CMPLTPS,
21231 IX86_BUILTIN_CMPLEPS,
21232 IX86_BUILTIN_CMPGTPS,
21233 IX86_BUILTIN_CMPGEPS,
21234 IX86_BUILTIN_CMPNEQPS,
21235 IX86_BUILTIN_CMPNLTPS,
21236 IX86_BUILTIN_CMPNLEPS,
21237 IX86_BUILTIN_CMPNGTPS,
21238 IX86_BUILTIN_CMPNGEPS,
21239 IX86_BUILTIN_CMPORDPS,
21240 IX86_BUILTIN_CMPUNORDPS,
21241 IX86_BUILTIN_CMPEQSS,
21242 IX86_BUILTIN_CMPLTSS,
21243 IX86_BUILTIN_CMPLESS,
21244 IX86_BUILTIN_CMPNEQSS,
21245 IX86_BUILTIN_CMPNLTSS,
21246 IX86_BUILTIN_CMPNLESS,
21247 IX86_BUILTIN_CMPNGTSS,
21248 IX86_BUILTIN_CMPNGESS,
21249 IX86_BUILTIN_CMPORDSS,
21250 IX86_BUILTIN_CMPUNORDSS,
21252 IX86_BUILTIN_COMIEQSS,
21253 IX86_BUILTIN_COMILTSS,
21254 IX86_BUILTIN_COMILESS,
21255 IX86_BUILTIN_COMIGTSS,
21256 IX86_BUILTIN_COMIGESS,
21257 IX86_BUILTIN_COMINEQSS,
21258 IX86_BUILTIN_UCOMIEQSS,
21259 IX86_BUILTIN_UCOMILTSS,
21260 IX86_BUILTIN_UCOMILESS,
21261 IX86_BUILTIN_UCOMIGTSS,
21262 IX86_BUILTIN_UCOMIGESS,
21263 IX86_BUILTIN_UCOMINEQSS,
21265 IX86_BUILTIN_CVTPI2PS,
21266 IX86_BUILTIN_CVTPS2PI,
21267 IX86_BUILTIN_CVTSI2SS,
21268 IX86_BUILTIN_CVTSI642SS,
21269 IX86_BUILTIN_CVTSS2SI,
21270 IX86_BUILTIN_CVTSS2SI64,
21271 IX86_BUILTIN_CVTTPS2PI,
21272 IX86_BUILTIN_CVTTSS2SI,
21273 IX86_BUILTIN_CVTTSS2SI64,
21275 IX86_BUILTIN_MAXPS,
21276 IX86_BUILTIN_MAXSS,
21277 IX86_BUILTIN_MINPS,
21278 IX86_BUILTIN_MINSS,
21280 IX86_BUILTIN_LOADUPS,
21281 IX86_BUILTIN_STOREUPS,
21282 IX86_BUILTIN_MOVSS,
21284 IX86_BUILTIN_MOVHLPS,
21285 IX86_BUILTIN_MOVLHPS,
21286 IX86_BUILTIN_LOADHPS,
21287 IX86_BUILTIN_LOADLPS,
21288 IX86_BUILTIN_STOREHPS,
21289 IX86_BUILTIN_STORELPS,
21291 IX86_BUILTIN_MASKMOVQ,
21292 IX86_BUILTIN_MOVMSKPS,
21293 IX86_BUILTIN_PMOVMSKB,
21295 IX86_BUILTIN_MOVNTPS,
21296 IX86_BUILTIN_MOVNTQ,
21298 IX86_BUILTIN_LOADDQU,
21299 IX86_BUILTIN_STOREDQU,
21301 IX86_BUILTIN_PACKSSWB,
21302 IX86_BUILTIN_PACKSSDW,
21303 IX86_BUILTIN_PACKUSWB,
21305 IX86_BUILTIN_PADDB,
21306 IX86_BUILTIN_PADDW,
21307 IX86_BUILTIN_PADDD,
21308 IX86_BUILTIN_PADDQ,
21309 IX86_BUILTIN_PADDSB,
21310 IX86_BUILTIN_PADDSW,
21311 IX86_BUILTIN_PADDUSB,
21312 IX86_BUILTIN_PADDUSW,
21313 IX86_BUILTIN_PSUBB,
21314 IX86_BUILTIN_PSUBW,
21315 IX86_BUILTIN_PSUBD,
21316 IX86_BUILTIN_PSUBQ,
21317 IX86_BUILTIN_PSUBSB,
21318 IX86_BUILTIN_PSUBSW,
21319 IX86_BUILTIN_PSUBUSB,
21320 IX86_BUILTIN_PSUBUSW,
21323 IX86_BUILTIN_PANDN,
21327 IX86_BUILTIN_PAVGB,
21328 IX86_BUILTIN_PAVGW,
21330 IX86_BUILTIN_PCMPEQB,
21331 IX86_BUILTIN_PCMPEQW,
21332 IX86_BUILTIN_PCMPEQD,
21333 IX86_BUILTIN_PCMPGTB,
21334 IX86_BUILTIN_PCMPGTW,
21335 IX86_BUILTIN_PCMPGTD,
21337 IX86_BUILTIN_PMADDWD,
21339 IX86_BUILTIN_PMAXSW,
21340 IX86_BUILTIN_PMAXUB,
21341 IX86_BUILTIN_PMINSW,
21342 IX86_BUILTIN_PMINUB,
21344 IX86_BUILTIN_PMULHUW,
21345 IX86_BUILTIN_PMULHW,
21346 IX86_BUILTIN_PMULLW,
21348 IX86_BUILTIN_PSADBW,
21349 IX86_BUILTIN_PSHUFW,
21351 IX86_BUILTIN_PSLLW,
21352 IX86_BUILTIN_PSLLD,
21353 IX86_BUILTIN_PSLLQ,
21354 IX86_BUILTIN_PSRAW,
21355 IX86_BUILTIN_PSRAD,
21356 IX86_BUILTIN_PSRLW,
21357 IX86_BUILTIN_PSRLD,
21358 IX86_BUILTIN_PSRLQ,
21359 IX86_BUILTIN_PSLLWI,
21360 IX86_BUILTIN_PSLLDI,
21361 IX86_BUILTIN_PSLLQI,
21362 IX86_BUILTIN_PSRAWI,
21363 IX86_BUILTIN_PSRADI,
21364 IX86_BUILTIN_PSRLWI,
21365 IX86_BUILTIN_PSRLDI,
21366 IX86_BUILTIN_PSRLQI,
21368 IX86_BUILTIN_PUNPCKHBW,
21369 IX86_BUILTIN_PUNPCKHWD,
21370 IX86_BUILTIN_PUNPCKHDQ,
21371 IX86_BUILTIN_PUNPCKLBW,
21372 IX86_BUILTIN_PUNPCKLWD,
21373 IX86_BUILTIN_PUNPCKLDQ,
21375 IX86_BUILTIN_SHUFPS,
21377 IX86_BUILTIN_RCPPS,
21378 IX86_BUILTIN_RCPSS,
21379 IX86_BUILTIN_RSQRTPS,
21380 IX86_BUILTIN_RSQRTPS_NR,
21381 IX86_BUILTIN_RSQRTSS,
21382 IX86_BUILTIN_RSQRTF,
21383 IX86_BUILTIN_SQRTPS,
21384 IX86_BUILTIN_SQRTPS_NR,
21385 IX86_BUILTIN_SQRTSS,
21387 IX86_BUILTIN_UNPCKHPS,
21388 IX86_BUILTIN_UNPCKLPS,
21390 IX86_BUILTIN_ANDPS,
21391 IX86_BUILTIN_ANDNPS,
21393 IX86_BUILTIN_XORPS,
21396 IX86_BUILTIN_LDMXCSR,
21397 IX86_BUILTIN_STMXCSR,
21398 IX86_BUILTIN_SFENCE,
21400 /* 3DNow! Original */
21401 IX86_BUILTIN_FEMMS,
21402 IX86_BUILTIN_PAVGUSB,
21403 IX86_BUILTIN_PF2ID,
21404 IX86_BUILTIN_PFACC,
21405 IX86_BUILTIN_PFADD,
21406 IX86_BUILTIN_PFCMPEQ,
21407 IX86_BUILTIN_PFCMPGE,
21408 IX86_BUILTIN_PFCMPGT,
21409 IX86_BUILTIN_PFMAX,
21410 IX86_BUILTIN_PFMIN,
21411 IX86_BUILTIN_PFMUL,
21412 IX86_BUILTIN_PFRCP,
21413 IX86_BUILTIN_PFRCPIT1,
21414 IX86_BUILTIN_PFRCPIT2,
21415 IX86_BUILTIN_PFRSQIT1,
21416 IX86_BUILTIN_PFRSQRT,
21417 IX86_BUILTIN_PFSUB,
21418 IX86_BUILTIN_PFSUBR,
21419 IX86_BUILTIN_PI2FD,
21420 IX86_BUILTIN_PMULHRW,
21422 /* 3DNow! Athlon Extensions */
21423 IX86_BUILTIN_PF2IW,
21424 IX86_BUILTIN_PFNACC,
21425 IX86_BUILTIN_PFPNACC,
21426 IX86_BUILTIN_PI2FW,
21427 IX86_BUILTIN_PSWAPDSI,
21428 IX86_BUILTIN_PSWAPDSF,
21431 IX86_BUILTIN_ADDPD,
21432 IX86_BUILTIN_ADDSD,
21433 IX86_BUILTIN_DIVPD,
21434 IX86_BUILTIN_DIVSD,
21435 IX86_BUILTIN_MULPD,
21436 IX86_BUILTIN_MULSD,
21437 IX86_BUILTIN_SUBPD,
21438 IX86_BUILTIN_SUBSD,
21440 IX86_BUILTIN_CMPEQPD,
21441 IX86_BUILTIN_CMPLTPD,
21442 IX86_BUILTIN_CMPLEPD,
21443 IX86_BUILTIN_CMPGTPD,
21444 IX86_BUILTIN_CMPGEPD,
21445 IX86_BUILTIN_CMPNEQPD,
21446 IX86_BUILTIN_CMPNLTPD,
21447 IX86_BUILTIN_CMPNLEPD,
21448 IX86_BUILTIN_CMPNGTPD,
21449 IX86_BUILTIN_CMPNGEPD,
21450 IX86_BUILTIN_CMPORDPD,
21451 IX86_BUILTIN_CMPUNORDPD,
21452 IX86_BUILTIN_CMPEQSD,
21453 IX86_BUILTIN_CMPLTSD,
21454 IX86_BUILTIN_CMPLESD,
21455 IX86_BUILTIN_CMPNEQSD,
21456 IX86_BUILTIN_CMPNLTSD,
21457 IX86_BUILTIN_CMPNLESD,
21458 IX86_BUILTIN_CMPORDSD,
21459 IX86_BUILTIN_CMPUNORDSD,
21461 IX86_BUILTIN_COMIEQSD,
21462 IX86_BUILTIN_COMILTSD,
21463 IX86_BUILTIN_COMILESD,
21464 IX86_BUILTIN_COMIGTSD,
21465 IX86_BUILTIN_COMIGESD,
21466 IX86_BUILTIN_COMINEQSD,
21467 IX86_BUILTIN_UCOMIEQSD,
21468 IX86_BUILTIN_UCOMILTSD,
21469 IX86_BUILTIN_UCOMILESD,
21470 IX86_BUILTIN_UCOMIGTSD,
21471 IX86_BUILTIN_UCOMIGESD,
21472 IX86_BUILTIN_UCOMINEQSD,
21474 IX86_BUILTIN_MAXPD,
21475 IX86_BUILTIN_MAXSD,
21476 IX86_BUILTIN_MINPD,
21477 IX86_BUILTIN_MINSD,
21479 IX86_BUILTIN_ANDPD,
21480 IX86_BUILTIN_ANDNPD,
21482 IX86_BUILTIN_XORPD,
21484 IX86_BUILTIN_SQRTPD,
21485 IX86_BUILTIN_SQRTSD,
21487 IX86_BUILTIN_UNPCKHPD,
21488 IX86_BUILTIN_UNPCKLPD,
21490 IX86_BUILTIN_SHUFPD,
21492 IX86_BUILTIN_LOADUPD,
21493 IX86_BUILTIN_STOREUPD,
21494 IX86_BUILTIN_MOVSD,
21496 IX86_BUILTIN_LOADHPD,
21497 IX86_BUILTIN_LOADLPD,
21499 IX86_BUILTIN_CVTDQ2PD,
21500 IX86_BUILTIN_CVTDQ2PS,
21502 IX86_BUILTIN_CVTPD2DQ,
21503 IX86_BUILTIN_CVTPD2PI,
21504 IX86_BUILTIN_CVTPD2PS,
21505 IX86_BUILTIN_CVTTPD2DQ,
21506 IX86_BUILTIN_CVTTPD2PI,
21508 IX86_BUILTIN_CVTPI2PD,
21509 IX86_BUILTIN_CVTSI2SD,
21510 IX86_BUILTIN_CVTSI642SD,
21512 IX86_BUILTIN_CVTSD2SI,
21513 IX86_BUILTIN_CVTSD2SI64,
21514 IX86_BUILTIN_CVTSD2SS,
21515 IX86_BUILTIN_CVTSS2SD,
21516 IX86_BUILTIN_CVTTSD2SI,
21517 IX86_BUILTIN_CVTTSD2SI64,
21519 IX86_BUILTIN_CVTPS2DQ,
21520 IX86_BUILTIN_CVTPS2PD,
21521 IX86_BUILTIN_CVTTPS2DQ,
21523 IX86_BUILTIN_MOVNTI,
21524 IX86_BUILTIN_MOVNTPD,
21525 IX86_BUILTIN_MOVNTDQ,
21527 IX86_BUILTIN_MOVQ128,
21530 IX86_BUILTIN_MASKMOVDQU,
21531 IX86_BUILTIN_MOVMSKPD,
21532 IX86_BUILTIN_PMOVMSKB128,
21534 IX86_BUILTIN_PACKSSWB128,
21535 IX86_BUILTIN_PACKSSDW128,
21536 IX86_BUILTIN_PACKUSWB128,
21538 IX86_BUILTIN_PADDB128,
21539 IX86_BUILTIN_PADDW128,
21540 IX86_BUILTIN_PADDD128,
21541 IX86_BUILTIN_PADDQ128,
21542 IX86_BUILTIN_PADDSB128,
21543 IX86_BUILTIN_PADDSW128,
21544 IX86_BUILTIN_PADDUSB128,
21545 IX86_BUILTIN_PADDUSW128,
21546 IX86_BUILTIN_PSUBB128,
21547 IX86_BUILTIN_PSUBW128,
21548 IX86_BUILTIN_PSUBD128,
21549 IX86_BUILTIN_PSUBQ128,
21550 IX86_BUILTIN_PSUBSB128,
21551 IX86_BUILTIN_PSUBSW128,
21552 IX86_BUILTIN_PSUBUSB128,
21553 IX86_BUILTIN_PSUBUSW128,
21555 IX86_BUILTIN_PAND128,
21556 IX86_BUILTIN_PANDN128,
21557 IX86_BUILTIN_POR128,
21558 IX86_BUILTIN_PXOR128,
21560 IX86_BUILTIN_PAVGB128,
21561 IX86_BUILTIN_PAVGW128,
21563 IX86_BUILTIN_PCMPEQB128,
21564 IX86_BUILTIN_PCMPEQW128,
21565 IX86_BUILTIN_PCMPEQD128,
21566 IX86_BUILTIN_PCMPGTB128,
21567 IX86_BUILTIN_PCMPGTW128,
21568 IX86_BUILTIN_PCMPGTD128,
21570 IX86_BUILTIN_PMADDWD128,
21572 IX86_BUILTIN_PMAXSW128,
21573 IX86_BUILTIN_PMAXUB128,
21574 IX86_BUILTIN_PMINSW128,
21575 IX86_BUILTIN_PMINUB128,
21577 IX86_BUILTIN_PMULUDQ,
21578 IX86_BUILTIN_PMULUDQ128,
21579 IX86_BUILTIN_PMULHUW128,
21580 IX86_BUILTIN_PMULHW128,
21581 IX86_BUILTIN_PMULLW128,
21583 IX86_BUILTIN_PSADBW128,
21584 IX86_BUILTIN_PSHUFHW,
21585 IX86_BUILTIN_PSHUFLW,
21586 IX86_BUILTIN_PSHUFD,
21588 IX86_BUILTIN_PSLLDQI128,
21589 IX86_BUILTIN_PSLLWI128,
21590 IX86_BUILTIN_PSLLDI128,
21591 IX86_BUILTIN_PSLLQI128,
21592 IX86_BUILTIN_PSRAWI128,
21593 IX86_BUILTIN_PSRADI128,
21594 IX86_BUILTIN_PSRLDQI128,
21595 IX86_BUILTIN_PSRLWI128,
21596 IX86_BUILTIN_PSRLDI128,
21597 IX86_BUILTIN_PSRLQI128,
21599 IX86_BUILTIN_PSLLDQ128,
21600 IX86_BUILTIN_PSLLW128,
21601 IX86_BUILTIN_PSLLD128,
21602 IX86_BUILTIN_PSLLQ128,
21603 IX86_BUILTIN_PSRAW128,
21604 IX86_BUILTIN_PSRAD128,
21605 IX86_BUILTIN_PSRLW128,
21606 IX86_BUILTIN_PSRLD128,
21607 IX86_BUILTIN_PSRLQ128,
21609 IX86_BUILTIN_PUNPCKHBW128,
21610 IX86_BUILTIN_PUNPCKHWD128,
21611 IX86_BUILTIN_PUNPCKHDQ128,
21612 IX86_BUILTIN_PUNPCKHQDQ128,
21613 IX86_BUILTIN_PUNPCKLBW128,
21614 IX86_BUILTIN_PUNPCKLWD128,
21615 IX86_BUILTIN_PUNPCKLDQ128,
21616 IX86_BUILTIN_PUNPCKLQDQ128,
21618 IX86_BUILTIN_CLFLUSH,
21619 IX86_BUILTIN_MFENCE,
21620 IX86_BUILTIN_LFENCE,
21622 IX86_BUILTIN_BSRSI,
21623 IX86_BUILTIN_BSRDI,
21624 IX86_BUILTIN_RDPMC,
21625 IX86_BUILTIN_RDTSC,
21626 IX86_BUILTIN_RDTSCP,
21627 IX86_BUILTIN_ROLQI,
21628 IX86_BUILTIN_ROLHI,
21629 IX86_BUILTIN_RORQI,
21630 IX86_BUILTIN_RORHI,
21633 IX86_BUILTIN_ADDSUBPS,
21634 IX86_BUILTIN_HADDPS,
21635 IX86_BUILTIN_HSUBPS,
21636 IX86_BUILTIN_MOVSHDUP,
21637 IX86_BUILTIN_MOVSLDUP,
21638 IX86_BUILTIN_ADDSUBPD,
21639 IX86_BUILTIN_HADDPD,
21640 IX86_BUILTIN_HSUBPD,
21641 IX86_BUILTIN_LDDQU,
21643 IX86_BUILTIN_MONITOR,
21644 IX86_BUILTIN_MWAIT,
21647 IX86_BUILTIN_PHADDW,
21648 IX86_BUILTIN_PHADDD,
21649 IX86_BUILTIN_PHADDSW,
21650 IX86_BUILTIN_PHSUBW,
21651 IX86_BUILTIN_PHSUBD,
21652 IX86_BUILTIN_PHSUBSW,
21653 IX86_BUILTIN_PMADDUBSW,
21654 IX86_BUILTIN_PMULHRSW,
21655 IX86_BUILTIN_PSHUFB,
21656 IX86_BUILTIN_PSIGNB,
21657 IX86_BUILTIN_PSIGNW,
21658 IX86_BUILTIN_PSIGND,
21659 IX86_BUILTIN_PALIGNR,
21660 IX86_BUILTIN_PABSB,
21661 IX86_BUILTIN_PABSW,
21662 IX86_BUILTIN_PABSD,
21664 IX86_BUILTIN_PHADDW128,
21665 IX86_BUILTIN_PHADDD128,
21666 IX86_BUILTIN_PHADDSW128,
21667 IX86_BUILTIN_PHSUBW128,
21668 IX86_BUILTIN_PHSUBD128,
21669 IX86_BUILTIN_PHSUBSW128,
21670 IX86_BUILTIN_PMADDUBSW128,
21671 IX86_BUILTIN_PMULHRSW128,
21672 IX86_BUILTIN_PSHUFB128,
21673 IX86_BUILTIN_PSIGNB128,
21674 IX86_BUILTIN_PSIGNW128,
21675 IX86_BUILTIN_PSIGND128,
21676 IX86_BUILTIN_PALIGNR128,
21677 IX86_BUILTIN_PABSB128,
21678 IX86_BUILTIN_PABSW128,
21679 IX86_BUILTIN_PABSD128,
21681 /* AMDFAM10 - SSE4A New Instructions. */
21682 IX86_BUILTIN_MOVNTSD,
21683 IX86_BUILTIN_MOVNTSS,
21684 IX86_BUILTIN_EXTRQI,
21685 IX86_BUILTIN_EXTRQ,
21686 IX86_BUILTIN_INSERTQI,
21687 IX86_BUILTIN_INSERTQ,
21690 IX86_BUILTIN_BLENDPD,
21691 IX86_BUILTIN_BLENDPS,
21692 IX86_BUILTIN_BLENDVPD,
21693 IX86_BUILTIN_BLENDVPS,
21694 IX86_BUILTIN_PBLENDVB128,
21695 IX86_BUILTIN_PBLENDW128,
21700 IX86_BUILTIN_INSERTPS128,
21702 IX86_BUILTIN_MOVNTDQA,
21703 IX86_BUILTIN_MPSADBW128,
21704 IX86_BUILTIN_PACKUSDW128,
21705 IX86_BUILTIN_PCMPEQQ,
21706 IX86_BUILTIN_PHMINPOSUW128,
21708 IX86_BUILTIN_PMAXSB128,
21709 IX86_BUILTIN_PMAXSD128,
21710 IX86_BUILTIN_PMAXUD128,
21711 IX86_BUILTIN_PMAXUW128,
21713 IX86_BUILTIN_PMINSB128,
21714 IX86_BUILTIN_PMINSD128,
21715 IX86_BUILTIN_PMINUD128,
21716 IX86_BUILTIN_PMINUW128,
21718 IX86_BUILTIN_PMOVSXBW128,
21719 IX86_BUILTIN_PMOVSXBD128,
21720 IX86_BUILTIN_PMOVSXBQ128,
21721 IX86_BUILTIN_PMOVSXWD128,
21722 IX86_BUILTIN_PMOVSXWQ128,
21723 IX86_BUILTIN_PMOVSXDQ128,
21725 IX86_BUILTIN_PMOVZXBW128,
21726 IX86_BUILTIN_PMOVZXBD128,
21727 IX86_BUILTIN_PMOVZXBQ128,
21728 IX86_BUILTIN_PMOVZXWD128,
21729 IX86_BUILTIN_PMOVZXWQ128,
21730 IX86_BUILTIN_PMOVZXDQ128,
21732 IX86_BUILTIN_PMULDQ128,
21733 IX86_BUILTIN_PMULLD128,
21735 IX86_BUILTIN_ROUNDPD,
21736 IX86_BUILTIN_ROUNDPS,
21737 IX86_BUILTIN_ROUNDSD,
21738 IX86_BUILTIN_ROUNDSS,
21740 IX86_BUILTIN_PTESTZ,
21741 IX86_BUILTIN_PTESTC,
21742 IX86_BUILTIN_PTESTNZC,
21744 IX86_BUILTIN_VEC_INIT_V2SI,
21745 IX86_BUILTIN_VEC_INIT_V4HI,
21746 IX86_BUILTIN_VEC_INIT_V8QI,
21747 IX86_BUILTIN_VEC_EXT_V2DF,
21748 IX86_BUILTIN_VEC_EXT_V2DI,
21749 IX86_BUILTIN_VEC_EXT_V4SF,
21750 IX86_BUILTIN_VEC_EXT_V4SI,
21751 IX86_BUILTIN_VEC_EXT_V8HI,
21752 IX86_BUILTIN_VEC_EXT_V2SI,
21753 IX86_BUILTIN_VEC_EXT_V4HI,
21754 IX86_BUILTIN_VEC_EXT_V16QI,
21755 IX86_BUILTIN_VEC_SET_V2DI,
21756 IX86_BUILTIN_VEC_SET_V4SF,
21757 IX86_BUILTIN_VEC_SET_V4SI,
21758 IX86_BUILTIN_VEC_SET_V8HI,
21759 IX86_BUILTIN_VEC_SET_V4HI,
21760 IX86_BUILTIN_VEC_SET_V16QI,
21762 IX86_BUILTIN_VEC_PACK_SFIX,
21765 IX86_BUILTIN_CRC32QI,
21766 IX86_BUILTIN_CRC32HI,
21767 IX86_BUILTIN_CRC32SI,
21768 IX86_BUILTIN_CRC32DI,
21770 IX86_BUILTIN_PCMPESTRI128,
21771 IX86_BUILTIN_PCMPESTRM128,
21772 IX86_BUILTIN_PCMPESTRA128,
21773 IX86_BUILTIN_PCMPESTRC128,
21774 IX86_BUILTIN_PCMPESTRO128,
21775 IX86_BUILTIN_PCMPESTRS128,
21776 IX86_BUILTIN_PCMPESTRZ128,
21777 IX86_BUILTIN_PCMPISTRI128,
21778 IX86_BUILTIN_PCMPISTRM128,
21779 IX86_BUILTIN_PCMPISTRA128,
21780 IX86_BUILTIN_PCMPISTRC128,
21781 IX86_BUILTIN_PCMPISTRO128,
21782 IX86_BUILTIN_PCMPISTRS128,
21783 IX86_BUILTIN_PCMPISTRZ128,
21785 IX86_BUILTIN_PCMPGTQ,
21787 /* AES instructions */
21788 IX86_BUILTIN_AESENC128,
21789 IX86_BUILTIN_AESENCLAST128,
21790 IX86_BUILTIN_AESDEC128,
21791 IX86_BUILTIN_AESDECLAST128,
21792 IX86_BUILTIN_AESIMC128,
21793 IX86_BUILTIN_AESKEYGENASSIST128,
21795 /* PCLMUL instruction */
21796 IX86_BUILTIN_PCLMULQDQ128,
21799 IX86_BUILTIN_ADDPD256,
21800 IX86_BUILTIN_ADDPS256,
21801 IX86_BUILTIN_ADDSUBPD256,
21802 IX86_BUILTIN_ADDSUBPS256,
21803 IX86_BUILTIN_ANDPD256,
21804 IX86_BUILTIN_ANDPS256,
21805 IX86_BUILTIN_ANDNPD256,
21806 IX86_BUILTIN_ANDNPS256,
21807 IX86_BUILTIN_BLENDPD256,
21808 IX86_BUILTIN_BLENDPS256,
21809 IX86_BUILTIN_BLENDVPD256,
21810 IX86_BUILTIN_BLENDVPS256,
21811 IX86_BUILTIN_DIVPD256,
21812 IX86_BUILTIN_DIVPS256,
21813 IX86_BUILTIN_DPPS256,
21814 IX86_BUILTIN_HADDPD256,
21815 IX86_BUILTIN_HADDPS256,
21816 IX86_BUILTIN_HSUBPD256,
21817 IX86_BUILTIN_HSUBPS256,
21818 IX86_BUILTIN_MAXPD256,
21819 IX86_BUILTIN_MAXPS256,
21820 IX86_BUILTIN_MINPD256,
21821 IX86_BUILTIN_MINPS256,
21822 IX86_BUILTIN_MULPD256,
21823 IX86_BUILTIN_MULPS256,
21824 IX86_BUILTIN_ORPD256,
21825 IX86_BUILTIN_ORPS256,
21826 IX86_BUILTIN_SHUFPD256,
21827 IX86_BUILTIN_SHUFPS256,
21828 IX86_BUILTIN_SUBPD256,
21829 IX86_BUILTIN_SUBPS256,
21830 IX86_BUILTIN_XORPD256,
21831 IX86_BUILTIN_XORPS256,
21832 IX86_BUILTIN_CMPSD,
21833 IX86_BUILTIN_CMPSS,
21834 IX86_BUILTIN_CMPPD,
21835 IX86_BUILTIN_CMPPS,
21836 IX86_BUILTIN_CMPPD256,
21837 IX86_BUILTIN_CMPPS256,
21838 IX86_BUILTIN_CVTDQ2PD256,
21839 IX86_BUILTIN_CVTDQ2PS256,
21840 IX86_BUILTIN_CVTPD2PS256,
21841 IX86_BUILTIN_CVTPS2DQ256,
21842 IX86_BUILTIN_CVTPS2PD256,
21843 IX86_BUILTIN_CVTTPD2DQ256,
21844 IX86_BUILTIN_CVTPD2DQ256,
21845 IX86_BUILTIN_CVTTPS2DQ256,
21846 IX86_BUILTIN_EXTRACTF128PD256,
21847 IX86_BUILTIN_EXTRACTF128PS256,
21848 IX86_BUILTIN_EXTRACTF128SI256,
21849 IX86_BUILTIN_VZEROALL,
21850 IX86_BUILTIN_VZEROUPPER,
21851 IX86_BUILTIN_VPERMILVARPD,
21852 IX86_BUILTIN_VPERMILVARPS,
21853 IX86_BUILTIN_VPERMILVARPD256,
21854 IX86_BUILTIN_VPERMILVARPS256,
21855 IX86_BUILTIN_VPERMILPD,
21856 IX86_BUILTIN_VPERMILPS,
21857 IX86_BUILTIN_VPERMILPD256,
21858 IX86_BUILTIN_VPERMILPS256,
21859 IX86_BUILTIN_VPERMIL2PD,
21860 IX86_BUILTIN_VPERMIL2PS,
21861 IX86_BUILTIN_VPERMIL2PD256,
21862 IX86_BUILTIN_VPERMIL2PS256,
21863 IX86_BUILTIN_VPERM2F128PD256,
21864 IX86_BUILTIN_VPERM2F128PS256,
21865 IX86_BUILTIN_VPERM2F128SI256,
21866 IX86_BUILTIN_VBROADCASTSS,
21867 IX86_BUILTIN_VBROADCASTSD256,
21868 IX86_BUILTIN_VBROADCASTSS256,
21869 IX86_BUILTIN_VBROADCASTPD256,
21870 IX86_BUILTIN_VBROADCASTPS256,
21871 IX86_BUILTIN_VINSERTF128PD256,
21872 IX86_BUILTIN_VINSERTF128PS256,
21873 IX86_BUILTIN_VINSERTF128SI256,
21874 IX86_BUILTIN_LOADUPD256,
21875 IX86_BUILTIN_LOADUPS256,
21876 IX86_BUILTIN_STOREUPD256,
21877 IX86_BUILTIN_STOREUPS256,
21878 IX86_BUILTIN_LDDQU256,
21879 IX86_BUILTIN_MOVNTDQ256,
21880 IX86_BUILTIN_MOVNTPD256,
21881 IX86_BUILTIN_MOVNTPS256,
21882 IX86_BUILTIN_LOADDQU256,
21883 IX86_BUILTIN_STOREDQU256,
21884 IX86_BUILTIN_MASKLOADPD,
21885 IX86_BUILTIN_MASKLOADPS,
21886 IX86_BUILTIN_MASKSTOREPD,
21887 IX86_BUILTIN_MASKSTOREPS,
21888 IX86_BUILTIN_MASKLOADPD256,
21889 IX86_BUILTIN_MASKLOADPS256,
21890 IX86_BUILTIN_MASKSTOREPD256,
21891 IX86_BUILTIN_MASKSTOREPS256,
21892 IX86_BUILTIN_MOVSHDUP256,
21893 IX86_BUILTIN_MOVSLDUP256,
21894 IX86_BUILTIN_MOVDDUP256,
21896 IX86_BUILTIN_SQRTPD256,
21897 IX86_BUILTIN_SQRTPS256,
21898 IX86_BUILTIN_SQRTPS_NR256,
21899 IX86_BUILTIN_RSQRTPS256,
21900 IX86_BUILTIN_RSQRTPS_NR256,
21902 IX86_BUILTIN_RCPPS256,
21904 IX86_BUILTIN_ROUNDPD256,
21905 IX86_BUILTIN_ROUNDPS256,
21907 IX86_BUILTIN_UNPCKHPD256,
21908 IX86_BUILTIN_UNPCKLPD256,
21909 IX86_BUILTIN_UNPCKHPS256,
21910 IX86_BUILTIN_UNPCKLPS256,
21912 IX86_BUILTIN_SI256_SI,
21913 IX86_BUILTIN_PS256_PS,
21914 IX86_BUILTIN_PD256_PD,
21915 IX86_BUILTIN_SI_SI256,
21916 IX86_BUILTIN_PS_PS256,
21917 IX86_BUILTIN_PD_PD256,
21919 IX86_BUILTIN_VTESTZPD,
21920 IX86_BUILTIN_VTESTCPD,
21921 IX86_BUILTIN_VTESTNZCPD,
21922 IX86_BUILTIN_VTESTZPS,
21923 IX86_BUILTIN_VTESTCPS,
21924 IX86_BUILTIN_VTESTNZCPS,
21925 IX86_BUILTIN_VTESTZPD256,
21926 IX86_BUILTIN_VTESTCPD256,
21927 IX86_BUILTIN_VTESTNZCPD256,
21928 IX86_BUILTIN_VTESTZPS256,
21929 IX86_BUILTIN_VTESTCPS256,
21930 IX86_BUILTIN_VTESTNZCPS256,
21931 IX86_BUILTIN_PTESTZ256,
21932 IX86_BUILTIN_PTESTC256,
21933 IX86_BUILTIN_PTESTNZC256,
21935 IX86_BUILTIN_MOVMSKPD256,
21936 IX86_BUILTIN_MOVMSKPS256,
21938 /* TFmode support builtins. */
21940 IX86_BUILTIN_HUGE_VALQ,
21941 IX86_BUILTIN_FABSQ,
21942 IX86_BUILTIN_COPYSIGNQ,
21944 /* Vectorizer support builtins. */
21945 IX86_BUILTIN_CPYSGNPS,
21946 IX86_BUILTIN_CPYSGNPD,
21948 IX86_BUILTIN_CVTUDQ2PS,
21950 IX86_BUILTIN_VEC_PERM_V2DF,
21951 IX86_BUILTIN_VEC_PERM_V4SF,
21952 IX86_BUILTIN_VEC_PERM_V2DI,
21953 IX86_BUILTIN_VEC_PERM_V4SI,
21954 IX86_BUILTIN_VEC_PERM_V8HI,
21955 IX86_BUILTIN_VEC_PERM_V16QI,
21956 IX86_BUILTIN_VEC_PERM_V2DI_U,
21957 IX86_BUILTIN_VEC_PERM_V4SI_U,
21958 IX86_BUILTIN_VEC_PERM_V8HI_U,
21959 IX86_BUILTIN_VEC_PERM_V16QI_U,
21960 IX86_BUILTIN_VEC_PERM_V4DF,
21961 IX86_BUILTIN_VEC_PERM_V8SF,
21963 /* FMA4 and XOP instructions. */
21964 IX86_BUILTIN_VFMADDSS,
21965 IX86_BUILTIN_VFMADDSD,
21966 IX86_BUILTIN_VFMADDPS,
21967 IX86_BUILTIN_VFMADDPD,
21968 IX86_BUILTIN_VFMSUBSS,
21969 IX86_BUILTIN_VFMSUBSD,
21970 IX86_BUILTIN_VFMSUBPS,
21971 IX86_BUILTIN_VFMSUBPD,
21972 IX86_BUILTIN_VFMADDSUBPS,
21973 IX86_BUILTIN_VFMADDSUBPD,
21974 IX86_BUILTIN_VFMSUBADDPS,
21975 IX86_BUILTIN_VFMSUBADDPD,
21976 IX86_BUILTIN_VFNMADDSS,
21977 IX86_BUILTIN_VFNMADDSD,
21978 IX86_BUILTIN_VFNMADDPS,
21979 IX86_BUILTIN_VFNMADDPD,
21980 IX86_BUILTIN_VFNMSUBSS,
21981 IX86_BUILTIN_VFNMSUBSD,
21982 IX86_BUILTIN_VFNMSUBPS,
21983 IX86_BUILTIN_VFNMSUBPD,
21984 IX86_BUILTIN_VFMADDPS256,
21985 IX86_BUILTIN_VFMADDPD256,
21986 IX86_BUILTIN_VFMSUBPS256,
21987 IX86_BUILTIN_VFMSUBPD256,
21988 IX86_BUILTIN_VFMADDSUBPS256,
21989 IX86_BUILTIN_VFMADDSUBPD256,
21990 IX86_BUILTIN_VFMSUBADDPS256,
21991 IX86_BUILTIN_VFMSUBADDPD256,
21992 IX86_BUILTIN_VFNMADDPS256,
21993 IX86_BUILTIN_VFNMADDPD256,
21994 IX86_BUILTIN_VFNMSUBPS256,
21995 IX86_BUILTIN_VFNMSUBPD256,
21997 IX86_BUILTIN_VPCMOV,
21998 IX86_BUILTIN_VPCMOV_V2DI,
21999 IX86_BUILTIN_VPCMOV_V4SI,
22000 IX86_BUILTIN_VPCMOV_V8HI,
22001 IX86_BUILTIN_VPCMOV_V16QI,
22002 IX86_BUILTIN_VPCMOV_V4SF,
22003 IX86_BUILTIN_VPCMOV_V2DF,
22004 IX86_BUILTIN_VPCMOV256,
22005 IX86_BUILTIN_VPCMOV_V4DI256,
22006 IX86_BUILTIN_VPCMOV_V8SI256,
22007 IX86_BUILTIN_VPCMOV_V16HI256,
22008 IX86_BUILTIN_VPCMOV_V32QI256,
22009 IX86_BUILTIN_VPCMOV_V8SF256,
22010 IX86_BUILTIN_VPCMOV_V4DF256,
22012 IX86_BUILTIN_VPPERM,
22014 IX86_BUILTIN_VPMACSSWW,
22015 IX86_BUILTIN_VPMACSWW,
22016 IX86_BUILTIN_VPMACSSWD,
22017 IX86_BUILTIN_VPMACSWD,
22018 IX86_BUILTIN_VPMACSSDD,
22019 IX86_BUILTIN_VPMACSDD,
22020 IX86_BUILTIN_VPMACSSDQL,
22021 IX86_BUILTIN_VPMACSSDQH,
22022 IX86_BUILTIN_VPMACSDQL,
22023 IX86_BUILTIN_VPMACSDQH,
22024 IX86_BUILTIN_VPMADCSSWD,
22025 IX86_BUILTIN_VPMADCSWD,
22027 IX86_BUILTIN_VPHADDBW,
22028 IX86_BUILTIN_VPHADDBD,
22029 IX86_BUILTIN_VPHADDBQ,
22030 IX86_BUILTIN_VPHADDWD,
22031 IX86_BUILTIN_VPHADDWQ,
22032 IX86_BUILTIN_VPHADDDQ,
22033 IX86_BUILTIN_VPHADDUBW,
22034 IX86_BUILTIN_VPHADDUBD,
22035 IX86_BUILTIN_VPHADDUBQ,
22036 IX86_BUILTIN_VPHADDUWD,
22037 IX86_BUILTIN_VPHADDUWQ,
22038 IX86_BUILTIN_VPHADDUDQ,
22039 IX86_BUILTIN_VPHSUBBW,
22040 IX86_BUILTIN_VPHSUBWD,
22041 IX86_BUILTIN_VPHSUBDQ,
22043 IX86_BUILTIN_VPROTB,
22044 IX86_BUILTIN_VPROTW,
22045 IX86_BUILTIN_VPROTD,
22046 IX86_BUILTIN_VPROTQ,
22047 IX86_BUILTIN_VPROTB_IMM,
22048 IX86_BUILTIN_VPROTW_IMM,
22049 IX86_BUILTIN_VPROTD_IMM,
22050 IX86_BUILTIN_VPROTQ_IMM,
22052 IX86_BUILTIN_VPSHLB,
22053 IX86_BUILTIN_VPSHLW,
22054 IX86_BUILTIN_VPSHLD,
22055 IX86_BUILTIN_VPSHLQ,
22056 IX86_BUILTIN_VPSHAB,
22057 IX86_BUILTIN_VPSHAW,
22058 IX86_BUILTIN_VPSHAD,
22059 IX86_BUILTIN_VPSHAQ,
22061 IX86_BUILTIN_VFRCZSS,
22062 IX86_BUILTIN_VFRCZSD,
22063 IX86_BUILTIN_VFRCZPS,
22064 IX86_BUILTIN_VFRCZPD,
22065 IX86_BUILTIN_VFRCZPS256,
22066 IX86_BUILTIN_VFRCZPD256,
22068 IX86_BUILTIN_VPCOMEQUB,
22069 IX86_BUILTIN_VPCOMNEUB,
22070 IX86_BUILTIN_VPCOMLTUB,
22071 IX86_BUILTIN_VPCOMLEUB,
22072 IX86_BUILTIN_VPCOMGTUB,
22073 IX86_BUILTIN_VPCOMGEUB,
22074 IX86_BUILTIN_VPCOMFALSEUB,
22075 IX86_BUILTIN_VPCOMTRUEUB,
22077 IX86_BUILTIN_VPCOMEQUW,
22078 IX86_BUILTIN_VPCOMNEUW,
22079 IX86_BUILTIN_VPCOMLTUW,
22080 IX86_BUILTIN_VPCOMLEUW,
22081 IX86_BUILTIN_VPCOMGTUW,
22082 IX86_BUILTIN_VPCOMGEUW,
22083 IX86_BUILTIN_VPCOMFALSEUW,
22084 IX86_BUILTIN_VPCOMTRUEUW,
22086 IX86_BUILTIN_VPCOMEQUD,
22087 IX86_BUILTIN_VPCOMNEUD,
22088 IX86_BUILTIN_VPCOMLTUD,
22089 IX86_BUILTIN_VPCOMLEUD,
22090 IX86_BUILTIN_VPCOMGTUD,
22091 IX86_BUILTIN_VPCOMGEUD,
22092 IX86_BUILTIN_VPCOMFALSEUD,
22093 IX86_BUILTIN_VPCOMTRUEUD,
22095 IX86_BUILTIN_VPCOMEQUQ,
22096 IX86_BUILTIN_VPCOMNEUQ,
22097 IX86_BUILTIN_VPCOMLTUQ,
22098 IX86_BUILTIN_VPCOMLEUQ,
22099 IX86_BUILTIN_VPCOMGTUQ,
22100 IX86_BUILTIN_VPCOMGEUQ,
22101 IX86_BUILTIN_VPCOMFALSEUQ,
22102 IX86_BUILTIN_VPCOMTRUEUQ,
22104 IX86_BUILTIN_VPCOMEQB,
22105 IX86_BUILTIN_VPCOMNEB,
22106 IX86_BUILTIN_VPCOMLTB,
22107 IX86_BUILTIN_VPCOMLEB,
22108 IX86_BUILTIN_VPCOMGTB,
22109 IX86_BUILTIN_VPCOMGEB,
22110 IX86_BUILTIN_VPCOMFALSEB,
22111 IX86_BUILTIN_VPCOMTRUEB,
22113 IX86_BUILTIN_VPCOMEQW,
22114 IX86_BUILTIN_VPCOMNEW,
22115 IX86_BUILTIN_VPCOMLTW,
22116 IX86_BUILTIN_VPCOMLEW,
22117 IX86_BUILTIN_VPCOMGTW,
22118 IX86_BUILTIN_VPCOMGEW,
22119 IX86_BUILTIN_VPCOMFALSEW,
22120 IX86_BUILTIN_VPCOMTRUEW,
22122 IX86_BUILTIN_VPCOMEQD,
22123 IX86_BUILTIN_VPCOMNED,
22124 IX86_BUILTIN_VPCOMLTD,
22125 IX86_BUILTIN_VPCOMLED,
22126 IX86_BUILTIN_VPCOMGTD,
22127 IX86_BUILTIN_VPCOMGED,
22128 IX86_BUILTIN_VPCOMFALSED,
22129 IX86_BUILTIN_VPCOMTRUED,
22131 IX86_BUILTIN_VPCOMEQQ,
22132 IX86_BUILTIN_VPCOMNEQ,
22133 IX86_BUILTIN_VPCOMLTQ,
22134 IX86_BUILTIN_VPCOMLEQ,
22135 IX86_BUILTIN_VPCOMGTQ,
22136 IX86_BUILTIN_VPCOMGEQ,
22137 IX86_BUILTIN_VPCOMFALSEQ,
22138 IX86_BUILTIN_VPCOMTRUEQ,
22140 /* LWP instructions. */
22141 IX86_BUILTIN_LLWPCB,
22142 IX86_BUILTIN_SLWPCB,
22143 IX86_BUILTIN_LWPVAL32,
22144 IX86_BUILTIN_LWPVAL64,
22145 IX86_BUILTIN_LWPINS32,
22146 IX86_BUILTIN_LWPINS64,
22150 /* FSGSBASE instructions. */
22151 IX86_BUILTIN_RDFSBASE32,
22152 IX86_BUILTIN_RDFSBASE64,
22153 IX86_BUILTIN_RDGSBASE32,
22154 IX86_BUILTIN_RDGSBASE64,
22155 IX86_BUILTIN_WRFSBASE32,
22156 IX86_BUILTIN_WRFSBASE64,
22157 IX86_BUILTIN_WRGSBASE32,
22158 IX86_BUILTIN_WRGSBASE64,
22160 /* RDRND instructions. */
22161 IX86_BUILTIN_RDRAND16,
22162 IX86_BUILTIN_RDRAND32,
22163 IX86_BUILTIN_RDRAND64,
22165 /* F16C instructions. */
22166 IX86_BUILTIN_CVTPH2PS,
22167 IX86_BUILTIN_CVTPH2PS256,
22168 IX86_BUILTIN_CVTPS2PH,
22169 IX86_BUILTIN_CVTPS2PH256,
22174 /* Table for the ix86 builtin decls. */
22175 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
22177 /* Table of all of the builtin functions that are possible with different ISA's
22178 but are waiting to be built until a function is declared to use that
22180 struct builtin_isa {
22181 const char *name; /* function name */
22182 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
22183 int isa; /* isa_flags this builtin is defined for */
22184 bool const_p; /* true if the declaration is constant */
22185 bool set_and_not_built_p;
22188 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
22191 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
22192 of which isa_flags to use in the ix86_builtins_isa array. Stores the
22193 function decl in the ix86_builtins array. Returns the function decl or
22194 NULL_TREE, if the builtin was not added.
22196 If the front end has a special hook for builtin functions, delay adding
22197 builtin functions that aren't in the current ISA until the ISA is changed
22198 with function specific optimization. Doing so, can save about 300K for the
22199 default compiler. When the builtin is expanded, check at that time whether
22202 If the front end doesn't have a special hook, record all builtins, even if
22203 it isn't an instruction set in the current ISA in case the user uses
22204 function specific options for a different ISA, so that we don't get scope
22205 errors if a builtin is added in the middle of a function scope. */
22208 def_builtin (int mask, const char *name, enum ix86_builtin_func_type tcode,
22209 enum ix86_builtins code)
22211 tree decl = NULL_TREE;
22213 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
22215 ix86_builtins_isa[(int) code].isa = mask;
22217 mask &= ~OPTION_MASK_ISA_64BIT;
22219 || (mask & ix86_isa_flags) != 0
22220 || (lang_hooks.builtin_function
22221 == lang_hooks.builtin_function_ext_scope))
22224 tree type = ix86_get_builtin_func_type (tcode);
22225 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
22227 ix86_builtins[(int) code] = decl;
22228 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
22232 ix86_builtins[(int) code] = NULL_TREE;
22233 ix86_builtins_isa[(int) code].tcode = tcode;
22234 ix86_builtins_isa[(int) code].name = name;
22235 ix86_builtins_isa[(int) code].const_p = false;
22236 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
22243 /* Like def_builtin, but also marks the function decl "const". */
22246 def_builtin_const (int mask, const char *name,
22247 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
22249 tree decl = def_builtin (mask, name, tcode, code);
22251 TREE_READONLY (decl) = 1;
22253 ix86_builtins_isa[(int) code].const_p = true;
22258 /* Add any new builtin functions for a given ISA that may not have been
22259 declared. This saves a bit of space compared to adding all of the
22260 declarations to the tree, even if we didn't use them. */
22263 ix86_add_new_builtins (int isa)
22267 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
22269 if ((ix86_builtins_isa[i].isa & isa) != 0
22270 && ix86_builtins_isa[i].set_and_not_built_p)
22274 /* Don't define the builtin again. */
22275 ix86_builtins_isa[i].set_and_not_built_p = false;
22277 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
22278 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
22279 type, i, BUILT_IN_MD, NULL,
22282 ix86_builtins[i] = decl;
22283 if (ix86_builtins_isa[i].const_p)
22284 TREE_READONLY (decl) = 1;
22289 /* Bits for builtin_description.flag. */
22291 /* Set when we don't support the comparison natively, and should
22292 swap_comparison in order to support it. */
22293 #define BUILTIN_DESC_SWAP_OPERANDS 1
22295 struct builtin_description
22297 const unsigned int mask;
22298 const enum insn_code icode;
22299 const char *const name;
22300 const enum ix86_builtins code;
22301 const enum rtx_code comparison;
22305 static const struct builtin_description bdesc_comi[] =
22307 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
22308 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
22309 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
22310 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
22311 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
22312 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
22313 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
22314 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
22315 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
22316 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
22317 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
22318 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
22319 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
22320 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
22321 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
22322 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
22323 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
22324 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
22325 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
22326 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
22327 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
22328 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
22329 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
22330 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
22333 static const struct builtin_description bdesc_pcmpestr[] =
22336 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
22337 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
22338 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
22339 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
22340 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
22341 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
22342 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
22345 static const struct builtin_description bdesc_pcmpistr[] =
22348 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
22349 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
22350 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
22351 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
22352 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
22353 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
22354 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
22357 /* Special builtins with variable number of arguments. */
22358 static const struct builtin_description bdesc_special_args[] =
22360 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
22361 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
22364 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
22367 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
22370 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
22371 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
22372 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
22374 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
22375 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
22376 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
22377 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
22379 /* SSE or 3DNow!A */
22380 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
22381 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
22384 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
22385 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
22386 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
22387 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
22388 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
22389 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
22390 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
22391 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
22392 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
22394 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
22395 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
22398 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
22401 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
22404 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
22405 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
22408 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
22409 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
22411 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
22412 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
22413 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
22414 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
22415 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
22417 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
22418 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
22419 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
22420 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
22421 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
22422 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
22423 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
22425 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
22426 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
22427 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
22429 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
22430 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
22431 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
22432 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
22433 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
22434 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
22435 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
22436 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
22438 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
22439 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
22440 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
22441 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
22442 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
22443 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
22446 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
22447 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
22448 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
22449 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
22450 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
22451 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
22452 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
22453 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
22456 { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandhi, "__builtin_ia32_rdrand16", IX86_BUILTIN_RDRAND16, UNKNOWN, (int) UINT16_FTYPE_VOID },
22457 { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandsi, "__builtin_ia32_rdrand32", IX86_BUILTIN_RDRAND32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
22458 { OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT, CODE_FOR_rdranddi, "__builtin_ia32_rdrand64", IX86_BUILTIN_RDRAND64, UNKNOWN, (int) UINT64_FTYPE_VOID },
22461 /* Builtins with variable number of arguments. */
22462 static const struct builtin_description bdesc_args[] =
22464 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
22465 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
22466 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
22467 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
22468 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
22469 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
22470 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
22473 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22474 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22475 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22476 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22477 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22478 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22480 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22481 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22482 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22483 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22484 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22485 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22486 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22487 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22489 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22490 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22492 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22493 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22494 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22495 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22497 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22498 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22499 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22500 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22501 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22502 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22504 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22505 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22506 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22507 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22508 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
22509 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
22511 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
22512 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
22513 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
22515 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
22517 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
22518 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
22519 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
22520 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
22521 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
22522 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
22524 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
22525 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
22526 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
22527 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
22528 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
22529 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
22531 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
22532 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
22533 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
22534 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
22537 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
22538 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
22539 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
22540 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
22542 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22543 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22544 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22545 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
22546 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
22547 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
22548 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22549 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22550 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22551 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22552 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22553 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22554 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22555 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22556 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22559 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
22560 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
22561 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
22562 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
22563 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22564 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22567 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
22568 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22569 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22570 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22571 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22572 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22573 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
22574 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
22575 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
22576 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
22577 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
22578 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
22580 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22582 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22583 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22584 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22585 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22586 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22587 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22588 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22589 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22591 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
22592 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
22593 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
22594 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22595 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22596 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
22597 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
22598 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
22599 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
22600 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22601 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
22602 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
22603 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
22604 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
22605 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
22606 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
22607 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
22608 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
22609 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
22610 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22611 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22612 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
22614 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22615 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22616 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22617 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22619 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22620 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22621 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22622 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22624 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22626 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22627 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22628 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22629 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22630 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22632 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
22633 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
22634 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
22636 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
22638 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
22639 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
22640 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
22642 /* SSE MMX or 3Dnow!A */
22643 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22644 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22645 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22647 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22648 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22649 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22650 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22652 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
22653 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
22655 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
22658 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22660 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
22661 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
22662 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
22663 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
22664 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
22665 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
22666 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI },
22667 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
22668 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
22669 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
22670 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
22671 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
22673 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
22674 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
22675 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
22676 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
22677 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
22678 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
22680 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
22681 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
22682 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
22683 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
22684 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
22686 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
22688 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
22689 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
22690 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
22691 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
22693 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
22694 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
22695 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
22697 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22698 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22699 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22700 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22701 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22702 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22703 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22704 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22706 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
22707 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
22708 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
22709 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
22710 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
22711 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
22712 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
22713 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
22714 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
22715 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
22716 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
22717 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
22718 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
22719 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
22720 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
22721 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
22722 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
22723 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
22724 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
22725 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
22727 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22728 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22729 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22730 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22732 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22733 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22734 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22735 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22737 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22739 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22740 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22741 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22743 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
22745 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22746 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22747 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22748 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22749 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22750 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22751 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22752 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22754 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22755 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22756 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22757 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22758 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22759 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22760 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22761 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22763 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22764 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
22766 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22767 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22768 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22769 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22771 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22772 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22774 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22775 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22776 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22777 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22778 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22779 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22781 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22782 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22783 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22784 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22786 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22787 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22788 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22789 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22790 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22791 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22792 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22793 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22795 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
22796 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
22797 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
22799 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22800 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
22802 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
22803 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
22805 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
22807 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
22808 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
22809 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
22810 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
22812 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
22813 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
22814 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
22815 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
22816 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
22817 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
22818 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
22820 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
22821 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
22822 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
22823 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
22824 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
22825 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
22826 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
22828 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
22829 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
22830 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
22831 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
22833 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
22834 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
22835 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
22837 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
22839 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
22840 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
22842 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
22845 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
22846 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
22849 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
22850 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22852 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22853 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22854 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22855 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22856 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22857 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22860 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
22861 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
22862 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
22863 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
22864 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
22865 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
22867 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22868 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22869 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22870 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22871 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22872 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22873 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22874 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22875 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22876 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22877 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22878 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22879 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
22880 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
22881 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22882 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22883 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22884 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22885 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22886 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22887 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22888 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22889 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22890 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22893 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
22894 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
22897 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22898 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22899 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
22900 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
22901 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22902 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22903 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22904 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
22905 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
22906 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
22908 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
22909 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
22910 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
22911 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
22912 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
22913 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
22914 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
22915 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
22916 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
22917 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
22918 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
22919 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
22920 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
22922 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
22923 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22924 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22925 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22926 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22927 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22928 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22929 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22930 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22931 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22932 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
22933 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22936 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
22937 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
22938 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22939 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22941 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
22942 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
22943 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
22946 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22947 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
22948 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
22949 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
22950 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
22953 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
22954 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
22955 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
22956 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22959 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
22960 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
22962 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22963 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22964 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22965 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22968 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
22971 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22972 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22973 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22974 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22975 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22976 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22977 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22978 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22979 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22980 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22981 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22982 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22983 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22984 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22985 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22986 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22987 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22988 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22989 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22990 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22991 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22992 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22993 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22994 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22995 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22996 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22998 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
22999 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
23000 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
23001 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
23003 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23004 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23005 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
23006 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
23007 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23008 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23009 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23010 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23011 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23012 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23013 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23014 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23015 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23016 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
23017 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
23018 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
23019 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
23020 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
23021 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
23022 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
23023 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
23024 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
23025 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
23026 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
23027 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23028 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23029 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
23030 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
23031 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
23032 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
23033 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
23034 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
23035 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
23036 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
23038 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23039 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23040 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
23042 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
23043 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23044 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23045 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23046 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23048 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23050 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
23051 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
23053 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23054 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23055 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23056 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23058 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
23059 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
23060 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
23061 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
23062 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
23063 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
23065 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23066 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23067 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23068 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23069 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23070 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23071 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23072 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23073 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23074 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23075 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23076 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23077 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23078 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23079 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23081 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
23082 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
23084 { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
23087 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
23088 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
23089 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
23090 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
23093 /* FMA4 and XOP. */
23094 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
23095 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
23096 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
23097 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
23098 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
23099 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
23100 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
23101 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
23102 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
23103 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
23104 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
23105 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
23106 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
23107 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
23108 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
23109 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
23110 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
23111 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
23112 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
23113 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
23114 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
23115 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
23116 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
23117 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
23118 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
23119 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
23120 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
23121 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
23122 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
23123 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
23124 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
23125 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
23126 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
23127 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
23128 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
23129 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
23130 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
23131 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
23132 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
23133 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
23134 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
23135 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
23136 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
23137 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
23138 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
23139 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
23140 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
23141 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
23142 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
23143 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
23144 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
23145 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
23147 static const struct builtin_description bdesc_multi_arg[] =
23149 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmaddv4sf4, "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23150 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmaddv2df4, "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23151 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4sf4, "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23152 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv2df4, "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23153 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv4sf4, "__builtin_ia32_vfmsubss", IX86_BUILTIN_VFMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23154 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv2df4, "__builtin_ia32_vfmsubsd", IX86_BUILTIN_VFMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23155 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4sf4, "__builtin_ia32_vfmsubps", IX86_BUILTIN_VFMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23156 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv2df4, "__builtin_ia32_vfmsubpd", IX86_BUILTIN_VFMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23158 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv4sf4, "__builtin_ia32_vfnmaddss", IX86_BUILTIN_VFNMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23159 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv2df4, "__builtin_ia32_vfnmaddsd", IX86_BUILTIN_VFNMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23160 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4sf4, "__builtin_ia32_vfnmaddps", IX86_BUILTIN_VFNMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23161 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv2df4, "__builtin_ia32_vfnmaddpd", IX86_BUILTIN_VFNMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23162 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmsubv4sf4, "__builtin_ia32_vfnmsubss", IX86_BUILTIN_VFNMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23163 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmsubv2df4, "__builtin_ia32_vfnmsubsd", IX86_BUILTIN_VFNMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23164 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv4sf4, "__builtin_ia32_vfnmsubps", IX86_BUILTIN_VFNMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23165 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv2df4, "__builtin_ia32_vfnmsubpd", IX86_BUILTIN_VFNMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23167 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4sf4, "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23168 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv2df4, "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23169 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4sf4, "__builtin_ia32_vfmsubaddps", IX86_BUILTIN_VFMSUBADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23170 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv2df4, "__builtin_ia32_vfmsubaddpd", IX86_BUILTIN_VFMSUBADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23172 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv8sf4256, "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23173 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4df4256, "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23174 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv8sf4256, "__builtin_ia32_vfmsubps256", IX86_BUILTIN_VFMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23175 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4df4256, "__builtin_ia32_vfmsubpd256", IX86_BUILTIN_VFMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23177 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv8sf4256, "__builtin_ia32_vfnmaddps256", IX86_BUILTIN_VFNMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23178 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4df4256, "__builtin_ia32_vfnmaddpd256", IX86_BUILTIN_VFNMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23179 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv8sf4256, "__builtin_ia32_vfnmsubps256", IX86_BUILTIN_VFNMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23180 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv4df4256, "__builtin_ia32_vfnmsubpd256", IX86_BUILTIN_VFNMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23182 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv8sf4, "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23183 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4df4, "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23184 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv8sf4, "__builtin_ia32_vfmsubaddps256", IX86_BUILTIN_VFMSUBADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23185 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4df4, "__builtin_ia32_vfmsubaddpd256", IX86_BUILTIN_VFMSUBADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23187 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
23188 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
23189 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
23190 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
23191 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
23192 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
23193 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
23195 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
23196 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
23197 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
23198 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
23199 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
23200 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23201 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23203 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
23205 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
23206 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
23207 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23208 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23209 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
23210 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
23211 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23212 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23213 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23214 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23215 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23216 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23218 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
23219 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
23220 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
23221 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
23222 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
23223 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
23224 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
23225 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
23226 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
23227 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
23228 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
23229 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
23230 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
23231 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
23232 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
23233 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
23235 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
23236 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
23237 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
23238 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
23239 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2256, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
23240 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2256, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
23242 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
23243 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
23244 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
23245 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
23246 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
23247 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
23248 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
23249 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
23250 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
23251 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
23252 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
23253 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
23254 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
23255 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
23256 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
23258 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
23259 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
23260 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
23261 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
23262 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
23263 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
23264 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
23266 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
23267 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
23268 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
23269 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
23270 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
23271 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
23272 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
23274 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
23275 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
23276 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
23277 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
23278 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
23279 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
23280 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
23282 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
23283 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
23284 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
23285 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
23286 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
23287 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
23288 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
23290 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
23291 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
23292 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
23293 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
23294 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
23295 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
23296 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
23298 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
23299 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
23300 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
23301 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
23302 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
23303 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
23304 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
23306 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
23307 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
23308 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
23309 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
23310 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
23311 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
23312 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
23314 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
23315 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
23316 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
23317 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
23318 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
23319 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
23320 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
23322 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
23323 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
23324 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
23325 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
23326 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
23327 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
23328 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
23329 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
23331 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
23332 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
23333 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
23334 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
23335 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
23336 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
23337 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
23338 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
23340 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
23341 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
23342 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
23343 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
23347 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
23348 in the current target ISA to allow the user to compile particular modules
23349 with different target specific options that differ from the command line
23352 ix86_init_mmx_sse_builtins (void)
23354 const struct builtin_description * d;
23355 enum ix86_builtin_func_type ftype;
23358 /* Add all special builtins with variable number of operands. */
23359 for (i = 0, d = bdesc_special_args;
23360 i < ARRAY_SIZE (bdesc_special_args);
23366 ftype = (enum ix86_builtin_func_type) d->flag;
23367 def_builtin (d->mask, d->name, ftype, d->code);
23370 /* Add all builtins with variable number of operands. */
23371 for (i = 0, d = bdesc_args;
23372 i < ARRAY_SIZE (bdesc_args);
23378 ftype = (enum ix86_builtin_func_type) d->flag;
23379 def_builtin_const (d->mask, d->name, ftype, d->code);
23382 /* pcmpestr[im] insns. */
23383 for (i = 0, d = bdesc_pcmpestr;
23384 i < ARRAY_SIZE (bdesc_pcmpestr);
23387 if (d->code == IX86_BUILTIN_PCMPESTRM128)
23388 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
23390 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
23391 def_builtin_const (d->mask, d->name, ftype, d->code);
23394 /* pcmpistr[im] insns. */
23395 for (i = 0, d = bdesc_pcmpistr;
23396 i < ARRAY_SIZE (bdesc_pcmpistr);
23399 if (d->code == IX86_BUILTIN_PCMPISTRM128)
23400 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
23402 ftype = INT_FTYPE_V16QI_V16QI_INT;
23403 def_builtin_const (d->mask, d->name, ftype, d->code);
23406 /* comi/ucomi insns. */
23407 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
23409 if (d->mask == OPTION_MASK_ISA_SSE2)
23410 ftype = INT_FTYPE_V2DF_V2DF;
23412 ftype = INT_FTYPE_V4SF_V4SF;
23413 def_builtin_const (d->mask, d->name, ftype, d->code);
23417 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
23418 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
23419 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
23420 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
23422 /* SSE or 3DNow!A */
23423 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
23424 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
23425 IX86_BUILTIN_MASKMOVQ);
23428 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
23429 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
23431 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
23432 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
23433 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
23434 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
23437 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
23438 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
23439 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
23440 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
23443 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
23444 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
23445 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
23446 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
23447 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
23448 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
23449 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
23450 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
23451 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
23452 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
23453 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
23454 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
23457 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
23458 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
23460 /* MMX access to the vec_init patterns. */
23461 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
23462 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
23464 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
23465 V4HI_FTYPE_HI_HI_HI_HI,
23466 IX86_BUILTIN_VEC_INIT_V4HI);
23468 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
23469 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
23470 IX86_BUILTIN_VEC_INIT_V8QI);
23472 /* Access to the vec_extract patterns. */
23473 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
23474 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
23475 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
23476 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
23477 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
23478 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
23479 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
23480 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
23481 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
23482 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
23484 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
23485 "__builtin_ia32_vec_ext_v4hi",
23486 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
23488 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
23489 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
23491 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
23492 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
23494 /* Access to the vec_set patterns. */
23495 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
23496 "__builtin_ia32_vec_set_v2di",
23497 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
23499 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
23500 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
23502 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
23503 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
23505 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
23506 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
23508 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
23509 "__builtin_ia32_vec_set_v4hi",
23510 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
23512 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
23513 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
23515 /* Add FMA4 multi-arg argument instructions */
23516 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
23521 ftype = (enum ix86_builtin_func_type) d->flag;
23522 def_builtin_const (d->mask, d->name, ftype, d->code);
23526 /* Internal method for ix86_init_builtins. */
23529 ix86_init_builtins_va_builtins_abi (void)
23531 tree ms_va_ref, sysv_va_ref;
23532 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
23533 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
23534 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
23535 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
23539 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
23540 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
23541 ms_va_ref = build_reference_type (ms_va_list_type_node);
23543 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
23546 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23547 fnvoid_va_start_ms =
23548 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23549 fnvoid_va_end_sysv =
23550 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
23551 fnvoid_va_start_sysv =
23552 build_varargs_function_type_list (void_type_node, sysv_va_ref,
23554 fnvoid_va_copy_ms =
23555 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
23557 fnvoid_va_copy_sysv =
23558 build_function_type_list (void_type_node, sysv_va_ref,
23559 sysv_va_ref, NULL_TREE);
23561 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
23562 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
23563 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
23564 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
23565 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
23566 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
23567 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
23568 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23569 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
23570 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23571 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
23572 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23576 ix86_init_builtin_types (void)
23578 tree float128_type_node, float80_type_node;
23580 /* The __float80 type. */
23581 float80_type_node = long_double_type_node;
23582 if (TYPE_MODE (float80_type_node) != XFmode)
23584 /* The __float80 type. */
23585 float80_type_node = make_node (REAL_TYPE);
23587 TYPE_PRECISION (float80_type_node) = 80;
23588 layout_type (float80_type_node);
23590 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
23592 /* The __float128 type. */
23593 float128_type_node = make_node (REAL_TYPE);
23594 TYPE_PRECISION (float128_type_node) = 128;
23595 layout_type (float128_type_node);
23596 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
23598 /* This macro is built by i386-builtin-types.awk. */
23599 DEFINE_BUILTIN_PRIMITIVE_TYPES;
23603 ix86_init_builtins (void)
23607 ix86_init_builtin_types ();
23609 /* TFmode support builtins. */
23610 def_builtin_const (0, "__builtin_infq",
23611 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
23612 def_builtin_const (0, "__builtin_huge_valq",
23613 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
23615 /* We will expand them to normal call if SSE2 isn't available since
23616 they are used by libgcc. */
23617 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
23618 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
23619 BUILT_IN_MD, "__fabstf2", NULL_TREE);
23620 TREE_READONLY (t) = 1;
23621 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
23623 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
23624 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
23625 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
23626 TREE_READONLY (t) = 1;
23627 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
23629 ix86_init_mmx_sse_builtins ();
23632 ix86_init_builtins_va_builtins_abi ();
23635 /* Return the ix86 builtin for CODE. */
23638 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
23640 if (code >= IX86_BUILTIN_MAX)
23641 return error_mark_node;
23643 return ix86_builtins[code];
23646 /* Errors in the source file can cause expand_expr to return const0_rtx
23647 where we expect a vector. To avoid crashing, use one of the vector
23648 clear instructions. */
23650 safe_vector_operand (rtx x, enum machine_mode mode)
23652 if (x == const0_rtx)
23653 x = CONST0_RTX (mode);
23657 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
23660 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
23663 tree arg0 = CALL_EXPR_ARG (exp, 0);
23664 tree arg1 = CALL_EXPR_ARG (exp, 1);
23665 rtx op0 = expand_normal (arg0);
23666 rtx op1 = expand_normal (arg1);
23667 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23668 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
23669 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
23671 if (VECTOR_MODE_P (mode0))
23672 op0 = safe_vector_operand (op0, mode0);
23673 if (VECTOR_MODE_P (mode1))
23674 op1 = safe_vector_operand (op1, mode1);
23676 if (optimize || !target
23677 || GET_MODE (target) != tmode
23678 || !insn_data[icode].operand[0].predicate (target, tmode))
23679 target = gen_reg_rtx (tmode);
23681 if (GET_MODE (op1) == SImode && mode1 == TImode)
23683 rtx x = gen_reg_rtx (V4SImode);
23684 emit_insn (gen_sse2_loadd (x, op1));
23685 op1 = gen_lowpart (TImode, x);
23688 if (!insn_data[icode].operand[1].predicate (op0, mode0))
23689 op0 = copy_to_mode_reg (mode0, op0);
23690 if (!insn_data[icode].operand[2].predicate (op1, mode1))
23691 op1 = copy_to_mode_reg (mode1, op1);
23693 pat = GEN_FCN (icode) (target, op0, op1);
23702 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
23705 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
23706 enum ix86_builtin_func_type m_type,
23707 enum rtx_code sub_code)
23712 bool comparison_p = false;
23714 bool last_arg_constant = false;
23715 int num_memory = 0;
23718 enum machine_mode mode;
23721 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23725 case MULTI_ARG_4_DF2_DI_I:
23726 case MULTI_ARG_4_DF2_DI_I1:
23727 case MULTI_ARG_4_SF2_SI_I:
23728 case MULTI_ARG_4_SF2_SI_I1:
23730 last_arg_constant = true;
23733 case MULTI_ARG_3_SF:
23734 case MULTI_ARG_3_DF:
23735 case MULTI_ARG_3_SF2:
23736 case MULTI_ARG_3_DF2:
23737 case MULTI_ARG_3_DI:
23738 case MULTI_ARG_3_SI:
23739 case MULTI_ARG_3_SI_DI:
23740 case MULTI_ARG_3_HI:
23741 case MULTI_ARG_3_HI_SI:
23742 case MULTI_ARG_3_QI:
23743 case MULTI_ARG_3_DI2:
23744 case MULTI_ARG_3_SI2:
23745 case MULTI_ARG_3_HI2:
23746 case MULTI_ARG_3_QI2:
23750 case MULTI_ARG_2_SF:
23751 case MULTI_ARG_2_DF:
23752 case MULTI_ARG_2_DI:
23753 case MULTI_ARG_2_SI:
23754 case MULTI_ARG_2_HI:
23755 case MULTI_ARG_2_QI:
23759 case MULTI_ARG_2_DI_IMM:
23760 case MULTI_ARG_2_SI_IMM:
23761 case MULTI_ARG_2_HI_IMM:
23762 case MULTI_ARG_2_QI_IMM:
23764 last_arg_constant = true;
23767 case MULTI_ARG_1_SF:
23768 case MULTI_ARG_1_DF:
23769 case MULTI_ARG_1_SF2:
23770 case MULTI_ARG_1_DF2:
23771 case MULTI_ARG_1_DI:
23772 case MULTI_ARG_1_SI:
23773 case MULTI_ARG_1_HI:
23774 case MULTI_ARG_1_QI:
23775 case MULTI_ARG_1_SI_DI:
23776 case MULTI_ARG_1_HI_DI:
23777 case MULTI_ARG_1_HI_SI:
23778 case MULTI_ARG_1_QI_DI:
23779 case MULTI_ARG_1_QI_SI:
23780 case MULTI_ARG_1_QI_HI:
23784 case MULTI_ARG_2_DI_CMP:
23785 case MULTI_ARG_2_SI_CMP:
23786 case MULTI_ARG_2_HI_CMP:
23787 case MULTI_ARG_2_QI_CMP:
23789 comparison_p = true;
23792 case MULTI_ARG_2_SF_TF:
23793 case MULTI_ARG_2_DF_TF:
23794 case MULTI_ARG_2_DI_TF:
23795 case MULTI_ARG_2_SI_TF:
23796 case MULTI_ARG_2_HI_TF:
23797 case MULTI_ARG_2_QI_TF:
23803 gcc_unreachable ();
23806 if (optimize || !target
23807 || GET_MODE (target) != tmode
23808 || !insn_data[icode].operand[0].predicate (target, tmode))
23809 target = gen_reg_rtx (tmode);
23811 gcc_assert (nargs <= 4);
23813 for (i = 0; i < nargs; i++)
23815 tree arg = CALL_EXPR_ARG (exp, i);
23816 rtx op = expand_normal (arg);
23817 int adjust = (comparison_p) ? 1 : 0;
23818 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
23820 if (last_arg_constant && i == nargs-1)
23822 if (!CONST_INT_P (op))
23824 error ("last argument must be an immediate");
23825 return gen_reg_rtx (tmode);
23830 if (VECTOR_MODE_P (mode))
23831 op = safe_vector_operand (op, mode);
23833 /* If we aren't optimizing, only allow one memory operand to be
23835 if (memory_operand (op, mode))
23838 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
23841 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
23843 op = force_reg (mode, op);
23847 args[i].mode = mode;
23853 pat = GEN_FCN (icode) (target, args[0].op);
23858 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
23859 GEN_INT ((int)sub_code));
23860 else if (! comparison_p)
23861 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
23864 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
23868 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
23873 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
23877 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
23881 gcc_unreachable ();
23891 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
23892 insns with vec_merge. */
23895 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
23899 tree arg0 = CALL_EXPR_ARG (exp, 0);
23900 rtx op1, op0 = expand_normal (arg0);
23901 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23902 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
23904 if (optimize || !target
23905 || GET_MODE (target) != tmode
23906 || !insn_data[icode].operand[0].predicate (target, tmode))
23907 target = gen_reg_rtx (tmode);
23909 if (VECTOR_MODE_P (mode0))
23910 op0 = safe_vector_operand (op0, mode0);
23912 if ((optimize && !register_operand (op0, mode0))
23913 || !insn_data[icode].operand[1].predicate (op0, mode0))
23914 op0 = copy_to_mode_reg (mode0, op0);
23917 if (!insn_data[icode].operand[2].predicate (op1, mode0))
23918 op1 = copy_to_mode_reg (mode0, op1);
23920 pat = GEN_FCN (icode) (target, op0, op1);
23927 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
23930 ix86_expand_sse_compare (const struct builtin_description *d,
23931 tree exp, rtx target, bool swap)
23934 tree arg0 = CALL_EXPR_ARG (exp, 0);
23935 tree arg1 = CALL_EXPR_ARG (exp, 1);
23936 rtx op0 = expand_normal (arg0);
23937 rtx op1 = expand_normal (arg1);
23939 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
23940 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
23941 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
23942 enum rtx_code comparison = d->comparison;
23944 if (VECTOR_MODE_P (mode0))
23945 op0 = safe_vector_operand (op0, mode0);
23946 if (VECTOR_MODE_P (mode1))
23947 op1 = safe_vector_operand (op1, mode1);
23949 /* Swap operands if we have a comparison that isn't available in
23953 rtx tmp = gen_reg_rtx (mode1);
23954 emit_move_insn (tmp, op1);
23959 if (optimize || !target
23960 || GET_MODE (target) != tmode
23961 || !insn_data[d->icode].operand[0].predicate (target, tmode))
23962 target = gen_reg_rtx (tmode);
23964 if ((optimize && !register_operand (op0, mode0))
23965 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
23966 op0 = copy_to_mode_reg (mode0, op0);
23967 if ((optimize && !register_operand (op1, mode1))
23968 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
23969 op1 = copy_to_mode_reg (mode1, op1);
23971 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
23972 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
23979 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
23982 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
23986 tree arg0 = CALL_EXPR_ARG (exp, 0);
23987 tree arg1 = CALL_EXPR_ARG (exp, 1);
23988 rtx op0 = expand_normal (arg0);
23989 rtx op1 = expand_normal (arg1);
23990 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
23991 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
23992 enum rtx_code comparison = d->comparison;
23994 if (VECTOR_MODE_P (mode0))
23995 op0 = safe_vector_operand (op0, mode0);
23996 if (VECTOR_MODE_P (mode1))
23997 op1 = safe_vector_operand (op1, mode1);
23999 /* Swap operands if we have a comparison that isn't available in
24001 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
24008 target = gen_reg_rtx (SImode);
24009 emit_move_insn (target, const0_rtx);
24010 target = gen_rtx_SUBREG (QImode, target, 0);
24012 if ((optimize && !register_operand (op0, mode0))
24013 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
24014 op0 = copy_to_mode_reg (mode0, op0);
24015 if ((optimize && !register_operand (op1, mode1))
24016 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
24017 op1 = copy_to_mode_reg (mode1, op1);
24019 pat = GEN_FCN (d->icode) (op0, op1);
24023 emit_insn (gen_rtx_SET (VOIDmode,
24024 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24025 gen_rtx_fmt_ee (comparison, QImode,
24029 return SUBREG_REG (target);
24032 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
24035 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
24039 tree arg0 = CALL_EXPR_ARG (exp, 0);
24040 tree arg1 = CALL_EXPR_ARG (exp, 1);
24041 rtx op0 = expand_normal (arg0);
24042 rtx op1 = expand_normal (arg1);
24043 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24044 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24045 enum rtx_code comparison = d->comparison;
24047 if (VECTOR_MODE_P (mode0))
24048 op0 = safe_vector_operand (op0, mode0);
24049 if (VECTOR_MODE_P (mode1))
24050 op1 = safe_vector_operand (op1, mode1);
24052 target = gen_reg_rtx (SImode);
24053 emit_move_insn (target, const0_rtx);
24054 target = gen_rtx_SUBREG (QImode, target, 0);
24056 if ((optimize && !register_operand (op0, mode0))
24057 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
24058 op0 = copy_to_mode_reg (mode0, op0);
24059 if ((optimize && !register_operand (op1, mode1))
24060 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
24061 op1 = copy_to_mode_reg (mode1, op1);
24063 pat = GEN_FCN (d->icode) (op0, op1);
24067 emit_insn (gen_rtx_SET (VOIDmode,
24068 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24069 gen_rtx_fmt_ee (comparison, QImode,
24073 return SUBREG_REG (target);
24076 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
24079 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
24080 tree exp, rtx target)
24083 tree arg0 = CALL_EXPR_ARG (exp, 0);
24084 tree arg1 = CALL_EXPR_ARG (exp, 1);
24085 tree arg2 = CALL_EXPR_ARG (exp, 2);
24086 tree arg3 = CALL_EXPR_ARG (exp, 3);
24087 tree arg4 = CALL_EXPR_ARG (exp, 4);
24088 rtx scratch0, scratch1;
24089 rtx op0 = expand_normal (arg0);
24090 rtx op1 = expand_normal (arg1);
24091 rtx op2 = expand_normal (arg2);
24092 rtx op3 = expand_normal (arg3);
24093 rtx op4 = expand_normal (arg4);
24094 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
24096 tmode0 = insn_data[d->icode].operand[0].mode;
24097 tmode1 = insn_data[d->icode].operand[1].mode;
24098 modev2 = insn_data[d->icode].operand[2].mode;
24099 modei3 = insn_data[d->icode].operand[3].mode;
24100 modev4 = insn_data[d->icode].operand[4].mode;
24101 modei5 = insn_data[d->icode].operand[5].mode;
24102 modeimm = insn_data[d->icode].operand[6].mode;
24104 if (VECTOR_MODE_P (modev2))
24105 op0 = safe_vector_operand (op0, modev2);
24106 if (VECTOR_MODE_P (modev4))
24107 op2 = safe_vector_operand (op2, modev4);
24109 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
24110 op0 = copy_to_mode_reg (modev2, op0);
24111 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
24112 op1 = copy_to_mode_reg (modei3, op1);
24113 if ((optimize && !register_operand (op2, modev4))
24114 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
24115 op2 = copy_to_mode_reg (modev4, op2);
24116 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
24117 op3 = copy_to_mode_reg (modei5, op3);
24119 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
24121 error ("the fifth argument must be a 8-bit immediate");
24125 if (d->code == IX86_BUILTIN_PCMPESTRI128)
24127 if (optimize || !target
24128 || GET_MODE (target) != tmode0
24129 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
24130 target = gen_reg_rtx (tmode0);
24132 scratch1 = gen_reg_rtx (tmode1);
24134 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
24136 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
24138 if (optimize || !target
24139 || GET_MODE (target) != tmode1
24140 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
24141 target = gen_reg_rtx (tmode1);
24143 scratch0 = gen_reg_rtx (tmode0);
24145 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
24149 gcc_assert (d->flag);
24151 scratch0 = gen_reg_rtx (tmode0);
24152 scratch1 = gen_reg_rtx (tmode1);
24154 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
24164 target = gen_reg_rtx (SImode);
24165 emit_move_insn (target, const0_rtx);
24166 target = gen_rtx_SUBREG (QImode, target, 0);
24169 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24170 gen_rtx_fmt_ee (EQ, QImode,
24171 gen_rtx_REG ((enum machine_mode) d->flag,
24174 return SUBREG_REG (target);
24181 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
24184 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
24185 tree exp, rtx target)
24188 tree arg0 = CALL_EXPR_ARG (exp, 0);
24189 tree arg1 = CALL_EXPR_ARG (exp, 1);
24190 tree arg2 = CALL_EXPR_ARG (exp, 2);
24191 rtx scratch0, scratch1;
24192 rtx op0 = expand_normal (arg0);
24193 rtx op1 = expand_normal (arg1);
24194 rtx op2 = expand_normal (arg2);
24195 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
24197 tmode0 = insn_data[d->icode].operand[0].mode;
24198 tmode1 = insn_data[d->icode].operand[1].mode;
24199 modev2 = insn_data[d->icode].operand[2].mode;
24200 modev3 = insn_data[d->icode].operand[3].mode;
24201 modeimm = insn_data[d->icode].operand[4].mode;
24203 if (VECTOR_MODE_P (modev2))
24204 op0 = safe_vector_operand (op0, modev2);
24205 if (VECTOR_MODE_P (modev3))
24206 op1 = safe_vector_operand (op1, modev3);
24208 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
24209 op0 = copy_to_mode_reg (modev2, op0);
24210 if ((optimize && !register_operand (op1, modev3))
24211 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
24212 op1 = copy_to_mode_reg (modev3, op1);
24214 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
24216 error ("the third argument must be a 8-bit immediate");
24220 if (d->code == IX86_BUILTIN_PCMPISTRI128)
24222 if (optimize || !target
24223 || GET_MODE (target) != tmode0
24224 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
24225 target = gen_reg_rtx (tmode0);
24227 scratch1 = gen_reg_rtx (tmode1);
24229 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
24231 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
24233 if (optimize || !target
24234 || GET_MODE (target) != tmode1
24235 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
24236 target = gen_reg_rtx (tmode1);
24238 scratch0 = gen_reg_rtx (tmode0);
24240 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
24244 gcc_assert (d->flag);
24246 scratch0 = gen_reg_rtx (tmode0);
24247 scratch1 = gen_reg_rtx (tmode1);
24249 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
24259 target = gen_reg_rtx (SImode);
24260 emit_move_insn (target, const0_rtx);
24261 target = gen_rtx_SUBREG (QImode, target, 0);
24264 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24265 gen_rtx_fmt_ee (EQ, QImode,
24266 gen_rtx_REG ((enum machine_mode) d->flag,
24269 return SUBREG_REG (target);
24275 /* Subroutine of ix86_expand_builtin to take care of insns with
24276 variable number of operands. */
24279 ix86_expand_args_builtin (const struct builtin_description *d,
24280 tree exp, rtx target)
24282 rtx pat, real_target;
24283 unsigned int i, nargs;
24284 unsigned int nargs_constant = 0;
24285 int num_memory = 0;
24289 enum machine_mode mode;
24291 bool last_arg_count = false;
24292 enum insn_code icode = d->icode;
24293 const struct insn_data_d *insn_p = &insn_data[icode];
24294 enum machine_mode tmode = insn_p->operand[0].mode;
24295 enum machine_mode rmode = VOIDmode;
24297 enum rtx_code comparison = d->comparison;
24299 switch ((enum ix86_builtin_func_type) d->flag)
24301 case INT_FTYPE_V8SF_V8SF_PTEST:
24302 case INT_FTYPE_V4DI_V4DI_PTEST:
24303 case INT_FTYPE_V4DF_V4DF_PTEST:
24304 case INT_FTYPE_V4SF_V4SF_PTEST:
24305 case INT_FTYPE_V2DI_V2DI_PTEST:
24306 case INT_FTYPE_V2DF_V2DF_PTEST:
24307 return ix86_expand_sse_ptest (d, exp, target);
24308 case FLOAT128_FTYPE_FLOAT128:
24309 case FLOAT_FTYPE_FLOAT:
24310 case INT_FTYPE_INT:
24311 case UINT64_FTYPE_INT:
24312 case UINT16_FTYPE_UINT16:
24313 case INT64_FTYPE_INT64:
24314 case INT64_FTYPE_V4SF:
24315 case INT64_FTYPE_V2DF:
24316 case INT_FTYPE_V16QI:
24317 case INT_FTYPE_V8QI:
24318 case INT_FTYPE_V8SF:
24319 case INT_FTYPE_V4DF:
24320 case INT_FTYPE_V4SF:
24321 case INT_FTYPE_V2DF:
24322 case V16QI_FTYPE_V16QI:
24323 case V8SI_FTYPE_V8SF:
24324 case V8SI_FTYPE_V4SI:
24325 case V8HI_FTYPE_V8HI:
24326 case V8HI_FTYPE_V16QI:
24327 case V8QI_FTYPE_V8QI:
24328 case V8SF_FTYPE_V8SF:
24329 case V8SF_FTYPE_V8SI:
24330 case V8SF_FTYPE_V4SF:
24331 case V8SF_FTYPE_V8HI:
24332 case V4SI_FTYPE_V4SI:
24333 case V4SI_FTYPE_V16QI:
24334 case V4SI_FTYPE_V4SF:
24335 case V4SI_FTYPE_V8SI:
24336 case V4SI_FTYPE_V8HI:
24337 case V4SI_FTYPE_V4DF:
24338 case V4SI_FTYPE_V2DF:
24339 case V4HI_FTYPE_V4HI:
24340 case V4DF_FTYPE_V4DF:
24341 case V4DF_FTYPE_V4SI:
24342 case V4DF_FTYPE_V4SF:
24343 case V4DF_FTYPE_V2DF:
24344 case V4SF_FTYPE_V4SF:
24345 case V4SF_FTYPE_V4SI:
24346 case V4SF_FTYPE_V8SF:
24347 case V4SF_FTYPE_V4DF:
24348 case V4SF_FTYPE_V8HI:
24349 case V4SF_FTYPE_V2DF:
24350 case V2DI_FTYPE_V2DI:
24351 case V2DI_FTYPE_V16QI:
24352 case V2DI_FTYPE_V8HI:
24353 case V2DI_FTYPE_V4SI:
24354 case V2DF_FTYPE_V2DF:
24355 case V2DF_FTYPE_V4SI:
24356 case V2DF_FTYPE_V4DF:
24357 case V2DF_FTYPE_V4SF:
24358 case V2DF_FTYPE_V2SI:
24359 case V2SI_FTYPE_V2SI:
24360 case V2SI_FTYPE_V4SF:
24361 case V2SI_FTYPE_V2SF:
24362 case V2SI_FTYPE_V2DF:
24363 case V2SF_FTYPE_V2SF:
24364 case V2SF_FTYPE_V2SI:
24367 case V4SF_FTYPE_V4SF_VEC_MERGE:
24368 case V2DF_FTYPE_V2DF_VEC_MERGE:
24369 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
24370 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
24371 case V16QI_FTYPE_V16QI_V16QI:
24372 case V16QI_FTYPE_V8HI_V8HI:
24373 case V8QI_FTYPE_V8QI_V8QI:
24374 case V8QI_FTYPE_V4HI_V4HI:
24375 case V8HI_FTYPE_V8HI_V8HI:
24376 case V8HI_FTYPE_V16QI_V16QI:
24377 case V8HI_FTYPE_V4SI_V4SI:
24378 case V8SF_FTYPE_V8SF_V8SF:
24379 case V8SF_FTYPE_V8SF_V8SI:
24380 case V4SI_FTYPE_V4SI_V4SI:
24381 case V4SI_FTYPE_V8HI_V8HI:
24382 case V4SI_FTYPE_V4SF_V4SF:
24383 case V4SI_FTYPE_V2DF_V2DF:
24384 case V4HI_FTYPE_V4HI_V4HI:
24385 case V4HI_FTYPE_V8QI_V8QI:
24386 case V4HI_FTYPE_V2SI_V2SI:
24387 case V4DF_FTYPE_V4DF_V4DF:
24388 case V4DF_FTYPE_V4DF_V4DI:
24389 case V4SF_FTYPE_V4SF_V4SF:
24390 case V4SF_FTYPE_V4SF_V4SI:
24391 case V4SF_FTYPE_V4SF_V2SI:
24392 case V4SF_FTYPE_V4SF_V2DF:
24393 case V4SF_FTYPE_V4SF_DI:
24394 case V4SF_FTYPE_V4SF_SI:
24395 case V2DI_FTYPE_V2DI_V2DI:
24396 case V2DI_FTYPE_V16QI_V16QI:
24397 case V2DI_FTYPE_V4SI_V4SI:
24398 case V2DI_FTYPE_V2DI_V16QI:
24399 case V2DI_FTYPE_V2DF_V2DF:
24400 case V2SI_FTYPE_V2SI_V2SI:
24401 case V2SI_FTYPE_V4HI_V4HI:
24402 case V2SI_FTYPE_V2SF_V2SF:
24403 case V2DF_FTYPE_V2DF_V2DF:
24404 case V2DF_FTYPE_V2DF_V4SF:
24405 case V2DF_FTYPE_V2DF_V2DI:
24406 case V2DF_FTYPE_V2DF_DI:
24407 case V2DF_FTYPE_V2DF_SI:
24408 case V2SF_FTYPE_V2SF_V2SF:
24409 case V1DI_FTYPE_V1DI_V1DI:
24410 case V1DI_FTYPE_V8QI_V8QI:
24411 case V1DI_FTYPE_V2SI_V2SI:
24412 if (comparison == UNKNOWN)
24413 return ix86_expand_binop_builtin (icode, exp, target);
24416 case V4SF_FTYPE_V4SF_V4SF_SWAP:
24417 case V2DF_FTYPE_V2DF_V2DF_SWAP:
24418 gcc_assert (comparison != UNKNOWN);
24422 case V8HI_FTYPE_V8HI_V8HI_COUNT:
24423 case V8HI_FTYPE_V8HI_SI_COUNT:
24424 case V4SI_FTYPE_V4SI_V4SI_COUNT:
24425 case V4SI_FTYPE_V4SI_SI_COUNT:
24426 case V4HI_FTYPE_V4HI_V4HI_COUNT:
24427 case V4HI_FTYPE_V4HI_SI_COUNT:
24428 case V2DI_FTYPE_V2DI_V2DI_COUNT:
24429 case V2DI_FTYPE_V2DI_SI_COUNT:
24430 case V2SI_FTYPE_V2SI_V2SI_COUNT:
24431 case V2SI_FTYPE_V2SI_SI_COUNT:
24432 case V1DI_FTYPE_V1DI_V1DI_COUNT:
24433 case V1DI_FTYPE_V1DI_SI_COUNT:
24435 last_arg_count = true;
24437 case UINT64_FTYPE_UINT64_UINT64:
24438 case UINT_FTYPE_UINT_UINT:
24439 case UINT_FTYPE_UINT_USHORT:
24440 case UINT_FTYPE_UINT_UCHAR:
24441 case UINT16_FTYPE_UINT16_INT:
24442 case UINT8_FTYPE_UINT8_INT:
24445 case V2DI_FTYPE_V2DI_INT_CONVERT:
24448 nargs_constant = 1;
24450 case V8HI_FTYPE_V8HI_INT:
24451 case V8HI_FTYPE_V8SF_INT:
24452 case V8HI_FTYPE_V4SF_INT:
24453 case V8SF_FTYPE_V8SF_INT:
24454 case V4SI_FTYPE_V4SI_INT:
24455 case V4SI_FTYPE_V8SI_INT:
24456 case V4HI_FTYPE_V4HI_INT:
24457 case V4DF_FTYPE_V4DF_INT:
24458 case V4SF_FTYPE_V4SF_INT:
24459 case V4SF_FTYPE_V8SF_INT:
24460 case V2DI_FTYPE_V2DI_INT:
24461 case V2DF_FTYPE_V2DF_INT:
24462 case V2DF_FTYPE_V4DF_INT:
24464 nargs_constant = 1;
24466 case V16QI_FTYPE_V16QI_V16QI_V16QI:
24467 case V8SF_FTYPE_V8SF_V8SF_V8SF:
24468 case V4DF_FTYPE_V4DF_V4DF_V4DF:
24469 case V4SF_FTYPE_V4SF_V4SF_V4SF:
24470 case V2DF_FTYPE_V2DF_V2DF_V2DF:
24473 case V16QI_FTYPE_V16QI_V16QI_INT:
24474 case V8HI_FTYPE_V8HI_V8HI_INT:
24475 case V8SI_FTYPE_V8SI_V8SI_INT:
24476 case V8SI_FTYPE_V8SI_V4SI_INT:
24477 case V8SF_FTYPE_V8SF_V8SF_INT:
24478 case V8SF_FTYPE_V8SF_V4SF_INT:
24479 case V4SI_FTYPE_V4SI_V4SI_INT:
24480 case V4DF_FTYPE_V4DF_V4DF_INT:
24481 case V4DF_FTYPE_V4DF_V2DF_INT:
24482 case V4SF_FTYPE_V4SF_V4SF_INT:
24483 case V2DI_FTYPE_V2DI_V2DI_INT:
24484 case V2DF_FTYPE_V2DF_V2DF_INT:
24486 nargs_constant = 1;
24488 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
24491 nargs_constant = 1;
24493 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
24496 nargs_constant = 1;
24498 case V2DI_FTYPE_V2DI_UINT_UINT:
24500 nargs_constant = 2;
24502 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
24503 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
24504 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
24505 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
24507 nargs_constant = 1;
24509 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
24511 nargs_constant = 2;
24514 gcc_unreachable ();
24517 gcc_assert (nargs <= ARRAY_SIZE (args));
24519 if (comparison != UNKNOWN)
24521 gcc_assert (nargs == 2);
24522 return ix86_expand_sse_compare (d, exp, target, swap);
24525 if (rmode == VOIDmode || rmode == tmode)
24529 || GET_MODE (target) != tmode
24530 || !insn_p->operand[0].predicate (target, tmode))
24531 target = gen_reg_rtx (tmode);
24532 real_target = target;
24536 target = gen_reg_rtx (rmode);
24537 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
24540 for (i = 0; i < nargs; i++)
24542 tree arg = CALL_EXPR_ARG (exp, i);
24543 rtx op = expand_normal (arg);
24544 enum machine_mode mode = insn_p->operand[i + 1].mode;
24545 bool match = insn_p->operand[i + 1].predicate (op, mode);
24547 if (last_arg_count && (i + 1) == nargs)
24549 /* SIMD shift insns take either an 8-bit immediate or
24550 register as count. But builtin functions take int as
24551 count. If count doesn't match, we put it in register. */
24554 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
24555 if (!insn_p->operand[i + 1].predicate (op, mode))
24556 op = copy_to_reg (op);
24559 else if ((nargs - i) <= nargs_constant)
24564 case CODE_FOR_sse4_1_roundpd:
24565 case CODE_FOR_sse4_1_roundps:
24566 case CODE_FOR_sse4_1_roundsd:
24567 case CODE_FOR_sse4_1_roundss:
24568 case CODE_FOR_sse4_1_blendps:
24569 case CODE_FOR_avx_blendpd256:
24570 case CODE_FOR_avx_vpermilv4df:
24571 case CODE_FOR_avx_roundpd256:
24572 case CODE_FOR_avx_roundps256:
24573 error ("the last argument must be a 4-bit immediate");
24576 case CODE_FOR_sse4_1_blendpd:
24577 case CODE_FOR_avx_vpermilv2df:
24578 case CODE_FOR_xop_vpermil2v2df3:
24579 case CODE_FOR_xop_vpermil2v4sf3:
24580 case CODE_FOR_xop_vpermil2v4df3:
24581 case CODE_FOR_xop_vpermil2v8sf3:
24582 error ("the last argument must be a 2-bit immediate");
24585 case CODE_FOR_avx_vextractf128v4df:
24586 case CODE_FOR_avx_vextractf128v8sf:
24587 case CODE_FOR_avx_vextractf128v8si:
24588 case CODE_FOR_avx_vinsertf128v4df:
24589 case CODE_FOR_avx_vinsertf128v8sf:
24590 case CODE_FOR_avx_vinsertf128v8si:
24591 error ("the last argument must be a 1-bit immediate");
24594 case CODE_FOR_avx_cmpsdv2df3:
24595 case CODE_FOR_avx_cmpssv4sf3:
24596 case CODE_FOR_avx_cmppdv2df3:
24597 case CODE_FOR_avx_cmppsv4sf3:
24598 case CODE_FOR_avx_cmppdv4df3:
24599 case CODE_FOR_avx_cmppsv8sf3:
24600 error ("the last argument must be a 5-bit immediate");
24604 switch (nargs_constant)
24607 if ((nargs - i) == nargs_constant)
24609 error ("the next to last argument must be an 8-bit immediate");
24613 error ("the last argument must be an 8-bit immediate");
24616 gcc_unreachable ();
24623 if (VECTOR_MODE_P (mode))
24624 op = safe_vector_operand (op, mode);
24626 /* If we aren't optimizing, only allow one memory operand to
24628 if (memory_operand (op, mode))
24631 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
24633 if (optimize || !match || num_memory > 1)
24634 op = copy_to_mode_reg (mode, op);
24638 op = copy_to_reg (op);
24639 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
24644 args[i].mode = mode;
24650 pat = GEN_FCN (icode) (real_target, args[0].op);
24653 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
24656 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24660 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24661 args[2].op, args[3].op);
24664 gcc_unreachable ();
24674 /* Subroutine of ix86_expand_builtin to take care of special insns
24675 with variable number of operands. */
24678 ix86_expand_special_args_builtin (const struct builtin_description *d,
24679 tree exp, rtx target)
24683 unsigned int i, nargs, arg_adjust, memory;
24687 enum machine_mode mode;
24689 enum insn_code icode = d->icode;
24690 bool last_arg_constant = false;
24691 const struct insn_data_d *insn_p = &insn_data[icode];
24692 enum machine_mode tmode = insn_p->operand[0].mode;
24693 enum { load, store } klass;
24695 switch ((enum ix86_builtin_func_type) d->flag)
24697 case VOID_FTYPE_VOID:
24698 emit_insn (GEN_FCN (icode) (target));
24700 case VOID_FTYPE_UINT64:
24701 case VOID_FTYPE_UNSIGNED:
24707 case UINT64_FTYPE_VOID:
24708 case UNSIGNED_FTYPE_VOID:
24709 case UINT16_FTYPE_VOID:
24714 case UINT64_FTYPE_PUNSIGNED:
24715 case V2DI_FTYPE_PV2DI:
24716 case V32QI_FTYPE_PCCHAR:
24717 case V16QI_FTYPE_PCCHAR:
24718 case V8SF_FTYPE_PCV4SF:
24719 case V8SF_FTYPE_PCFLOAT:
24720 case V4SF_FTYPE_PCFLOAT:
24721 case V4DF_FTYPE_PCV2DF:
24722 case V4DF_FTYPE_PCDOUBLE:
24723 case V2DF_FTYPE_PCDOUBLE:
24724 case VOID_FTYPE_PVOID:
24729 case VOID_FTYPE_PV2SF_V4SF:
24730 case VOID_FTYPE_PV4DI_V4DI:
24731 case VOID_FTYPE_PV2DI_V2DI:
24732 case VOID_FTYPE_PCHAR_V32QI:
24733 case VOID_FTYPE_PCHAR_V16QI:
24734 case VOID_FTYPE_PFLOAT_V8SF:
24735 case VOID_FTYPE_PFLOAT_V4SF:
24736 case VOID_FTYPE_PDOUBLE_V4DF:
24737 case VOID_FTYPE_PDOUBLE_V2DF:
24738 case VOID_FTYPE_PULONGLONG_ULONGLONG:
24739 case VOID_FTYPE_PINT_INT:
24742 /* Reserve memory operand for target. */
24743 memory = ARRAY_SIZE (args);
24745 case V4SF_FTYPE_V4SF_PCV2SF:
24746 case V2DF_FTYPE_V2DF_PCDOUBLE:
24751 case V8SF_FTYPE_PCV8SF_V8SF:
24752 case V4DF_FTYPE_PCV4DF_V4DF:
24753 case V4SF_FTYPE_PCV4SF_V4SF:
24754 case V2DF_FTYPE_PCV2DF_V2DF:
24759 case VOID_FTYPE_PV8SF_V8SF_V8SF:
24760 case VOID_FTYPE_PV4DF_V4DF_V4DF:
24761 case VOID_FTYPE_PV4SF_V4SF_V4SF:
24762 case VOID_FTYPE_PV2DF_V2DF_V2DF:
24765 /* Reserve memory operand for target. */
24766 memory = ARRAY_SIZE (args);
24768 case VOID_FTYPE_UINT_UINT_UINT:
24769 case VOID_FTYPE_UINT64_UINT_UINT:
24770 case UCHAR_FTYPE_UINT_UINT_UINT:
24771 case UCHAR_FTYPE_UINT64_UINT_UINT:
24774 memory = ARRAY_SIZE (args);
24775 last_arg_constant = true;
24778 gcc_unreachable ();
24781 gcc_assert (nargs <= ARRAY_SIZE (args));
24783 if (klass == store)
24785 arg = CALL_EXPR_ARG (exp, 0);
24786 op = expand_normal (arg);
24787 gcc_assert (target == 0);
24789 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
24791 target = force_reg (tmode, op);
24799 || GET_MODE (target) != tmode
24800 || !insn_p->operand[0].predicate (target, tmode))
24801 target = gen_reg_rtx (tmode);
24804 for (i = 0; i < nargs; i++)
24806 enum machine_mode mode = insn_p->operand[i + 1].mode;
24809 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
24810 op = expand_normal (arg);
24811 match = insn_p->operand[i + 1].predicate (op, mode);
24813 if (last_arg_constant && (i + 1) == nargs)
24817 if (icode == CODE_FOR_lwp_lwpvalsi3
24818 || icode == CODE_FOR_lwp_lwpinssi3
24819 || icode == CODE_FOR_lwp_lwpvaldi3
24820 || icode == CODE_FOR_lwp_lwpinsdi3)
24821 error ("the last argument must be a 32-bit immediate");
24823 error ("the last argument must be an 8-bit immediate");
24831 /* This must be the memory operand. */
24832 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
24833 gcc_assert (GET_MODE (op) == mode
24834 || GET_MODE (op) == VOIDmode);
24838 /* This must be register. */
24839 if (VECTOR_MODE_P (mode))
24840 op = safe_vector_operand (op, mode);
24842 gcc_assert (GET_MODE (op) == mode
24843 || GET_MODE (op) == VOIDmode);
24844 op = copy_to_mode_reg (mode, op);
24849 args[i].mode = mode;
24855 pat = GEN_FCN (icode) (target);
24858 pat = GEN_FCN (icode) (target, args[0].op);
24861 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
24864 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
24867 gcc_unreachable ();
24873 return klass == store ? 0 : target;
24876 /* Return the integer constant in ARG. Constrain it to be in the range
24877 of the subparts of VEC_TYPE; issue an error if not. */
24880 get_element_number (tree vec_type, tree arg)
24882 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
24884 if (!host_integerp (arg, 1)
24885 || (elt = tree_low_cst (arg, 1), elt > max))
24887 error ("selector must be an integer constant in the range 0..%wi", max);
24894 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
24895 ix86_expand_vector_init. We DO have language-level syntax for this, in
24896 the form of (type){ init-list }. Except that since we can't place emms
24897 instructions from inside the compiler, we can't allow the use of MMX
24898 registers unless the user explicitly asks for it. So we do *not* define
24899 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
24900 we have builtins invoked by mmintrin.h that gives us license to emit
24901 these sorts of instructions. */
24904 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
24906 enum machine_mode tmode = TYPE_MODE (type);
24907 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
24908 int i, n_elt = GET_MODE_NUNITS (tmode);
24909 rtvec v = rtvec_alloc (n_elt);
24911 gcc_assert (VECTOR_MODE_P (tmode));
24912 gcc_assert (call_expr_nargs (exp) == n_elt);
24914 for (i = 0; i < n_elt; ++i)
24916 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
24917 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
24920 if (!target || !register_operand (target, tmode))
24921 target = gen_reg_rtx (tmode);
24923 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
24927 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
24928 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
24929 had a language-level syntax for referencing vector elements. */
24932 ix86_expand_vec_ext_builtin (tree exp, rtx target)
24934 enum machine_mode tmode, mode0;
24939 arg0 = CALL_EXPR_ARG (exp, 0);
24940 arg1 = CALL_EXPR_ARG (exp, 1);
24942 op0 = expand_normal (arg0);
24943 elt = get_element_number (TREE_TYPE (arg0), arg1);
24945 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
24946 mode0 = TYPE_MODE (TREE_TYPE (arg0));
24947 gcc_assert (VECTOR_MODE_P (mode0));
24949 op0 = force_reg (mode0, op0);
24951 if (optimize || !target || !register_operand (target, tmode))
24952 target = gen_reg_rtx (tmode);
24954 ix86_expand_vector_extract (true, target, op0, elt);
24959 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
24960 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
24961 a language-level syntax for referencing vector elements. */
24964 ix86_expand_vec_set_builtin (tree exp)
24966 enum machine_mode tmode, mode1;
24967 tree arg0, arg1, arg2;
24969 rtx op0, op1, target;
24971 arg0 = CALL_EXPR_ARG (exp, 0);
24972 arg1 = CALL_EXPR_ARG (exp, 1);
24973 arg2 = CALL_EXPR_ARG (exp, 2);
24975 tmode = TYPE_MODE (TREE_TYPE (arg0));
24976 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
24977 gcc_assert (VECTOR_MODE_P (tmode));
24979 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
24980 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
24981 elt = get_element_number (TREE_TYPE (arg0), arg2);
24983 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
24984 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
24986 op0 = force_reg (tmode, op0);
24987 op1 = force_reg (mode1, op1);
24989 /* OP0 is the source of these builtin functions and shouldn't be
24990 modified. Create a copy, use it and return it as target. */
24991 target = gen_reg_rtx (tmode);
24992 emit_move_insn (target, op0);
24993 ix86_expand_vector_set (true, target, op1, elt);
24998 /* Expand an expression EXP that calls a built-in function,
24999 with result going to TARGET if that's convenient
25000 (and in mode MODE if that's convenient).
25001 SUBTARGET may be used as the target for computing one of EXP's operands.
25002 IGNORE is nonzero if the value is to be ignored. */
25005 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
25006 enum machine_mode mode ATTRIBUTE_UNUSED,
25007 int ignore ATTRIBUTE_UNUSED)
25009 const struct builtin_description *d;
25011 enum insn_code icode;
25012 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25013 tree arg0, arg1, arg2;
25014 rtx op0, op1, op2, pat;
25015 enum machine_mode mode0, mode1, mode2;
25016 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25018 /* Determine whether the builtin function is available under the current ISA.
25019 Originally the builtin was not created if it wasn't applicable to the
25020 current ISA based on the command line switches. With function specific
25021 options, we need to check in the context of the function making the call
25022 whether it is supported. */
25023 if (ix86_builtins_isa[fcode].isa
25024 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
25026 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
25027 NULL, NULL, false);
25030 error ("%qE needs unknown isa option", fndecl);
25033 gcc_assert (opts != NULL);
25034 error ("%qE needs isa option %s", fndecl, opts);
25042 case IX86_BUILTIN_MASKMOVQ:
25043 case IX86_BUILTIN_MASKMOVDQU:
25044 icode = (fcode == IX86_BUILTIN_MASKMOVQ
25045 ? CODE_FOR_mmx_maskmovq
25046 : CODE_FOR_sse2_maskmovdqu);
25047 /* Note the arg order is different from the operand order. */
25048 arg1 = CALL_EXPR_ARG (exp, 0);
25049 arg2 = CALL_EXPR_ARG (exp, 1);
25050 arg0 = CALL_EXPR_ARG (exp, 2);
25051 op0 = expand_normal (arg0);
25052 op1 = expand_normal (arg1);
25053 op2 = expand_normal (arg2);
25054 mode0 = insn_data[icode].operand[0].mode;
25055 mode1 = insn_data[icode].operand[1].mode;
25056 mode2 = insn_data[icode].operand[2].mode;
25058 op0 = force_reg (Pmode, op0);
25059 op0 = gen_rtx_MEM (mode1, op0);
25061 if (!insn_data[icode].operand[0].predicate (op0, mode0))
25062 op0 = copy_to_mode_reg (mode0, op0);
25063 if (!insn_data[icode].operand[1].predicate (op1, mode1))
25064 op1 = copy_to_mode_reg (mode1, op1);
25065 if (!insn_data[icode].operand[2].predicate (op2, mode2))
25066 op2 = copy_to_mode_reg (mode2, op2);
25067 pat = GEN_FCN (icode) (op0, op1, op2);
25073 case IX86_BUILTIN_LDMXCSR:
25074 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
25075 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25076 emit_move_insn (target, op0);
25077 emit_insn (gen_sse_ldmxcsr (target));
25080 case IX86_BUILTIN_STMXCSR:
25081 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25082 emit_insn (gen_sse_stmxcsr (target));
25083 return copy_to_mode_reg (SImode, target);
25085 case IX86_BUILTIN_CLFLUSH:
25086 arg0 = CALL_EXPR_ARG (exp, 0);
25087 op0 = expand_normal (arg0);
25088 icode = CODE_FOR_sse2_clflush;
25089 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
25090 op0 = copy_to_mode_reg (Pmode, op0);
25092 emit_insn (gen_sse2_clflush (op0));
25095 case IX86_BUILTIN_MONITOR:
25096 arg0 = CALL_EXPR_ARG (exp, 0);
25097 arg1 = CALL_EXPR_ARG (exp, 1);
25098 arg2 = CALL_EXPR_ARG (exp, 2);
25099 op0 = expand_normal (arg0);
25100 op1 = expand_normal (arg1);
25101 op2 = expand_normal (arg2);
25103 op0 = copy_to_mode_reg (Pmode, op0);
25105 op1 = copy_to_mode_reg (SImode, op1);
25107 op2 = copy_to_mode_reg (SImode, op2);
25108 emit_insn (ix86_gen_monitor (op0, op1, op2));
25111 case IX86_BUILTIN_MWAIT:
25112 arg0 = CALL_EXPR_ARG (exp, 0);
25113 arg1 = CALL_EXPR_ARG (exp, 1);
25114 op0 = expand_normal (arg0);
25115 op1 = expand_normal (arg1);
25117 op0 = copy_to_mode_reg (SImode, op0);
25119 op1 = copy_to_mode_reg (SImode, op1);
25120 emit_insn (gen_sse3_mwait (op0, op1));
25123 case IX86_BUILTIN_VEC_INIT_V2SI:
25124 case IX86_BUILTIN_VEC_INIT_V4HI:
25125 case IX86_BUILTIN_VEC_INIT_V8QI:
25126 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
25128 case IX86_BUILTIN_VEC_EXT_V2DF:
25129 case IX86_BUILTIN_VEC_EXT_V2DI:
25130 case IX86_BUILTIN_VEC_EXT_V4SF:
25131 case IX86_BUILTIN_VEC_EXT_V4SI:
25132 case IX86_BUILTIN_VEC_EXT_V8HI:
25133 case IX86_BUILTIN_VEC_EXT_V2SI:
25134 case IX86_BUILTIN_VEC_EXT_V4HI:
25135 case IX86_BUILTIN_VEC_EXT_V16QI:
25136 return ix86_expand_vec_ext_builtin (exp, target);
25138 case IX86_BUILTIN_VEC_SET_V2DI:
25139 case IX86_BUILTIN_VEC_SET_V4SF:
25140 case IX86_BUILTIN_VEC_SET_V4SI:
25141 case IX86_BUILTIN_VEC_SET_V8HI:
25142 case IX86_BUILTIN_VEC_SET_V4HI:
25143 case IX86_BUILTIN_VEC_SET_V16QI:
25144 return ix86_expand_vec_set_builtin (exp);
25146 case IX86_BUILTIN_VEC_PERM_V2DF:
25147 case IX86_BUILTIN_VEC_PERM_V4SF:
25148 case IX86_BUILTIN_VEC_PERM_V2DI:
25149 case IX86_BUILTIN_VEC_PERM_V4SI:
25150 case IX86_BUILTIN_VEC_PERM_V8HI:
25151 case IX86_BUILTIN_VEC_PERM_V16QI:
25152 case IX86_BUILTIN_VEC_PERM_V2DI_U:
25153 case IX86_BUILTIN_VEC_PERM_V4SI_U:
25154 case IX86_BUILTIN_VEC_PERM_V8HI_U:
25155 case IX86_BUILTIN_VEC_PERM_V16QI_U:
25156 case IX86_BUILTIN_VEC_PERM_V4DF:
25157 case IX86_BUILTIN_VEC_PERM_V8SF:
25158 return ix86_expand_vec_perm_builtin (exp);
25160 case IX86_BUILTIN_INFQ:
25161 case IX86_BUILTIN_HUGE_VALQ:
25163 REAL_VALUE_TYPE inf;
25167 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
25169 tmp = validize_mem (force_const_mem (mode, tmp));
25172 target = gen_reg_rtx (mode);
25174 emit_move_insn (target, tmp);
25178 case IX86_BUILTIN_LLWPCB:
25179 arg0 = CALL_EXPR_ARG (exp, 0);
25180 op0 = expand_normal (arg0);
25181 icode = CODE_FOR_lwp_llwpcb;
25182 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
25183 op0 = copy_to_mode_reg (Pmode, op0);
25184 emit_insn (gen_lwp_llwpcb (op0));
25187 case IX86_BUILTIN_SLWPCB:
25188 icode = CODE_FOR_lwp_slwpcb;
25190 || !insn_data[icode].operand[0].predicate (target, Pmode))
25191 target = gen_reg_rtx (Pmode);
25192 emit_insn (gen_lwp_slwpcb (target));
25199 for (i = 0, d = bdesc_special_args;
25200 i < ARRAY_SIZE (bdesc_special_args);
25202 if (d->code == fcode)
25203 return ix86_expand_special_args_builtin (d, exp, target);
25205 for (i = 0, d = bdesc_args;
25206 i < ARRAY_SIZE (bdesc_args);
25208 if (d->code == fcode)
25211 case IX86_BUILTIN_FABSQ:
25212 case IX86_BUILTIN_COPYSIGNQ:
25214 /* Emit a normal call if SSE2 isn't available. */
25215 return expand_call (exp, target, ignore);
25217 return ix86_expand_args_builtin (d, exp, target);
25220 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25221 if (d->code == fcode)
25222 return ix86_expand_sse_comi (d, exp, target);
25224 for (i = 0, d = bdesc_pcmpestr;
25225 i < ARRAY_SIZE (bdesc_pcmpestr);
25227 if (d->code == fcode)
25228 return ix86_expand_sse_pcmpestr (d, exp, target);
25230 for (i = 0, d = bdesc_pcmpistr;
25231 i < ARRAY_SIZE (bdesc_pcmpistr);
25233 if (d->code == fcode)
25234 return ix86_expand_sse_pcmpistr (d, exp, target);
25236 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25237 if (d->code == fcode)
25238 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
25239 (enum ix86_builtin_func_type)
25240 d->flag, d->comparison);
25242 gcc_unreachable ();
25245 /* Returns a function decl for a vectorized version of the builtin function
25246 with builtin function code FN and the result vector type TYPE, or NULL_TREE
25247 if it is not available. */
25250 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
25253 enum machine_mode in_mode, out_mode;
25255 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
25257 if (TREE_CODE (type_out) != VECTOR_TYPE
25258 || TREE_CODE (type_in) != VECTOR_TYPE
25259 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
25262 out_mode = TYPE_MODE (TREE_TYPE (type_out));
25263 out_n = TYPE_VECTOR_SUBPARTS (type_out);
25264 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25265 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25269 case BUILT_IN_SQRT:
25270 if (out_mode == DFmode && out_n == 2
25271 && in_mode == DFmode && in_n == 2)
25272 return ix86_builtins[IX86_BUILTIN_SQRTPD];
25275 case BUILT_IN_SQRTF:
25276 if (out_mode == SFmode && out_n == 4
25277 && in_mode == SFmode && in_n == 4)
25278 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
25281 case BUILT_IN_LRINT:
25282 if (out_mode == SImode && out_n == 4
25283 && in_mode == DFmode && in_n == 2)
25284 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
25287 case BUILT_IN_LRINTF:
25288 if (out_mode == SImode && out_n == 4
25289 && in_mode == SFmode && in_n == 4)
25290 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
25293 case BUILT_IN_COPYSIGN:
25294 if (out_mode == DFmode && out_n == 2
25295 && in_mode == DFmode && in_n == 2)
25296 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
25299 case BUILT_IN_COPYSIGNF:
25300 if (out_mode == SFmode && out_n == 4
25301 && in_mode == SFmode && in_n == 4)
25302 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
25309 /* Dispatch to a handler for a vectorization library. */
25310 if (ix86_veclib_handler)
25311 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
25317 /* Handler for an SVML-style interface to
25318 a library with vectorized intrinsics. */
25321 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
25324 tree fntype, new_fndecl, args;
25327 enum machine_mode el_mode, in_mode;
25330 /* The SVML is suitable for unsafe math only. */
25331 if (!flag_unsafe_math_optimizations)
25334 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25335 n = TYPE_VECTOR_SUBPARTS (type_out);
25336 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25337 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25338 if (el_mode != in_mode
25346 case BUILT_IN_LOG10:
25348 case BUILT_IN_TANH:
25350 case BUILT_IN_ATAN:
25351 case BUILT_IN_ATAN2:
25352 case BUILT_IN_ATANH:
25353 case BUILT_IN_CBRT:
25354 case BUILT_IN_SINH:
25356 case BUILT_IN_ASINH:
25357 case BUILT_IN_ASIN:
25358 case BUILT_IN_COSH:
25360 case BUILT_IN_ACOSH:
25361 case BUILT_IN_ACOS:
25362 if (el_mode != DFmode || n != 2)
25366 case BUILT_IN_EXPF:
25367 case BUILT_IN_LOGF:
25368 case BUILT_IN_LOG10F:
25369 case BUILT_IN_POWF:
25370 case BUILT_IN_TANHF:
25371 case BUILT_IN_TANF:
25372 case BUILT_IN_ATANF:
25373 case BUILT_IN_ATAN2F:
25374 case BUILT_IN_ATANHF:
25375 case BUILT_IN_CBRTF:
25376 case BUILT_IN_SINHF:
25377 case BUILT_IN_SINF:
25378 case BUILT_IN_ASINHF:
25379 case BUILT_IN_ASINF:
25380 case BUILT_IN_COSHF:
25381 case BUILT_IN_COSF:
25382 case BUILT_IN_ACOSHF:
25383 case BUILT_IN_ACOSF:
25384 if (el_mode != SFmode || n != 4)
25392 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25394 if (fn == BUILT_IN_LOGF)
25395 strcpy (name, "vmlsLn4");
25396 else if (fn == BUILT_IN_LOG)
25397 strcpy (name, "vmldLn2");
25400 sprintf (name, "vmls%s", bname+10);
25401 name[strlen (name)-1] = '4';
25404 sprintf (name, "vmld%s2", bname+10);
25406 /* Convert to uppercase. */
25410 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25411 args = TREE_CHAIN (args))
25415 fntype = build_function_type_list (type_out, type_in, NULL);
25417 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25419 /* Build a function declaration for the vectorized function. */
25420 new_fndecl = build_decl (BUILTINS_LOCATION,
25421 FUNCTION_DECL, get_identifier (name), fntype);
25422 TREE_PUBLIC (new_fndecl) = 1;
25423 DECL_EXTERNAL (new_fndecl) = 1;
25424 DECL_IS_NOVOPS (new_fndecl) = 1;
25425 TREE_READONLY (new_fndecl) = 1;
25430 /* Handler for an ACML-style interface to
25431 a library with vectorized intrinsics. */
25434 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
25436 char name[20] = "__vr.._";
25437 tree fntype, new_fndecl, args;
25440 enum machine_mode el_mode, in_mode;
25443 /* The ACML is 64bits only and suitable for unsafe math only as
25444 it does not correctly support parts of IEEE with the required
25445 precision such as denormals. */
25447 || !flag_unsafe_math_optimizations)
25450 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25451 n = TYPE_VECTOR_SUBPARTS (type_out);
25452 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25453 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25454 if (el_mode != in_mode
25464 case BUILT_IN_LOG2:
25465 case BUILT_IN_LOG10:
25468 if (el_mode != DFmode
25473 case BUILT_IN_SINF:
25474 case BUILT_IN_COSF:
25475 case BUILT_IN_EXPF:
25476 case BUILT_IN_POWF:
25477 case BUILT_IN_LOGF:
25478 case BUILT_IN_LOG2F:
25479 case BUILT_IN_LOG10F:
25482 if (el_mode != SFmode
25491 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25492 sprintf (name + 7, "%s", bname+10);
25495 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25496 args = TREE_CHAIN (args))
25500 fntype = build_function_type_list (type_out, type_in, NULL);
25502 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25504 /* Build a function declaration for the vectorized function. */
25505 new_fndecl = build_decl (BUILTINS_LOCATION,
25506 FUNCTION_DECL, get_identifier (name), fntype);
25507 TREE_PUBLIC (new_fndecl) = 1;
25508 DECL_EXTERNAL (new_fndecl) = 1;
25509 DECL_IS_NOVOPS (new_fndecl) = 1;
25510 TREE_READONLY (new_fndecl) = 1;
25516 /* Returns a decl of a function that implements conversion of an integer vector
25517 into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
25518 are the types involved when converting according to CODE.
25519 Return NULL_TREE if it is not available. */
25522 ix86_vectorize_builtin_conversion (unsigned int code,
25523 tree dest_type, tree src_type)
25531 switch (TYPE_MODE (src_type))
25534 switch (TYPE_MODE (dest_type))
25537 return (TYPE_UNSIGNED (src_type)
25538 ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
25539 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
25541 return (TYPE_UNSIGNED (src_type)
25543 : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
25549 switch (TYPE_MODE (dest_type))
25552 return (TYPE_UNSIGNED (src_type)
25554 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
25563 case FIX_TRUNC_EXPR:
25564 switch (TYPE_MODE (dest_type))
25567 switch (TYPE_MODE (src_type))
25570 return (TYPE_UNSIGNED (dest_type)
25572 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
25574 return (TYPE_UNSIGNED (dest_type)
25576 : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
25583 switch (TYPE_MODE (src_type))
25586 return (TYPE_UNSIGNED (dest_type)
25588 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
25605 /* Returns a code for a target-specific builtin that implements
25606 reciprocal of the function, or NULL_TREE if not available. */
25609 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
25610 bool sqrt ATTRIBUTE_UNUSED)
25612 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
25613 && flag_finite_math_only && !flag_trapping_math
25614 && flag_unsafe_math_optimizations))
25618 /* Machine dependent builtins. */
25621 /* Vectorized version of sqrt to rsqrt conversion. */
25622 case IX86_BUILTIN_SQRTPS_NR:
25623 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
25629 /* Normal builtins. */
25632 /* Sqrt to rsqrt conversion. */
25633 case BUILT_IN_SQRTF:
25634 return ix86_builtins[IX86_BUILTIN_RSQRTF];
25641 /* Helper for avx_vpermilps256_operand et al. This is also used by
25642 the expansion functions to turn the parallel back into a mask.
25643 The return value is 0 for no match and the imm8+1 for a match. */
25646 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
25648 unsigned i, nelt = GET_MODE_NUNITS (mode);
25650 unsigned char ipar[8];
25652 if (XVECLEN (par, 0) != (int) nelt)
25655 /* Validate that all of the elements are constants, and not totally
25656 out of range. Copy the data into an integral array to make the
25657 subsequent checks easier. */
25658 for (i = 0; i < nelt; ++i)
25660 rtx er = XVECEXP (par, 0, i);
25661 unsigned HOST_WIDE_INT ei;
25663 if (!CONST_INT_P (er))
25674 /* In the 256-bit DFmode case, we can only move elements within
25676 for (i = 0; i < 2; ++i)
25680 mask |= ipar[i] << i;
25682 for (i = 2; i < 4; ++i)
25686 mask |= (ipar[i] - 2) << i;
25691 /* In the 256-bit SFmode case, we have full freedom of movement
25692 within the low 128-bit lane, but the high 128-bit lane must
25693 mirror the exact same pattern. */
25694 for (i = 0; i < 4; ++i)
25695 if (ipar[i] + 4 != ipar[i + 4])
25702 /* In the 128-bit case, we've full freedom in the placement of
25703 the elements from the source operand. */
25704 for (i = 0; i < nelt; ++i)
25705 mask |= ipar[i] << (i * (nelt / 2));
25709 gcc_unreachable ();
25712 /* Make sure success has a non-zero value by adding one. */
25716 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
25717 the expansion functions to turn the parallel back into a mask.
25718 The return value is 0 for no match and the imm8+1 for a match. */
25721 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
25723 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
25725 unsigned char ipar[8];
25727 if (XVECLEN (par, 0) != (int) nelt)
25730 /* Validate that all of the elements are constants, and not totally
25731 out of range. Copy the data into an integral array to make the
25732 subsequent checks easier. */
25733 for (i = 0; i < nelt; ++i)
25735 rtx er = XVECEXP (par, 0, i);
25736 unsigned HOST_WIDE_INT ei;
25738 if (!CONST_INT_P (er))
25741 if (ei >= 2 * nelt)
25746 /* Validate that the halves of the permute are halves. */
25747 for (i = 0; i < nelt2 - 1; ++i)
25748 if (ipar[i] + 1 != ipar[i + 1])
25750 for (i = nelt2; i < nelt - 1; ++i)
25751 if (ipar[i] + 1 != ipar[i + 1])
25754 /* Reconstruct the mask. */
25755 for (i = 0; i < 2; ++i)
25757 unsigned e = ipar[i * nelt2];
25761 mask |= e << (i * 4);
25764 /* Make sure success has a non-zero value by adding one. */
25769 /* Store OPERAND to the memory after reload is completed. This means
25770 that we can't easily use assign_stack_local. */
25772 ix86_force_to_memory (enum machine_mode mode, rtx operand)
25776 gcc_assert (reload_completed);
25777 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE)
25779 result = gen_rtx_MEM (mode,
25780 gen_rtx_PLUS (Pmode,
25782 GEN_INT (-RED_ZONE_SIZE)));
25783 emit_move_insn (result, operand);
25785 else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT)
25791 operand = gen_lowpart (DImode, operand);
25795 gen_rtx_SET (VOIDmode,
25796 gen_rtx_MEM (DImode,
25797 gen_rtx_PRE_DEC (DImode,
25798 stack_pointer_rtx)),
25802 gcc_unreachable ();
25804 result = gen_rtx_MEM (mode, stack_pointer_rtx);
25813 split_di (&operand, 1, operands, operands + 1);
25815 gen_rtx_SET (VOIDmode,
25816 gen_rtx_MEM (SImode,
25817 gen_rtx_PRE_DEC (Pmode,
25818 stack_pointer_rtx)),
25821 gen_rtx_SET (VOIDmode,
25822 gen_rtx_MEM (SImode,
25823 gen_rtx_PRE_DEC (Pmode,
25824 stack_pointer_rtx)),
25829 /* Store HImodes as SImodes. */
25830 operand = gen_lowpart (SImode, operand);
25834 gen_rtx_SET (VOIDmode,
25835 gen_rtx_MEM (GET_MODE (operand),
25836 gen_rtx_PRE_DEC (SImode,
25837 stack_pointer_rtx)),
25841 gcc_unreachable ();
25843 result = gen_rtx_MEM (mode, stack_pointer_rtx);
25848 /* Free operand from the memory. */
25850 ix86_free_from_memory (enum machine_mode mode)
25852 if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI)
25856 if (mode == DImode || TARGET_64BIT)
25860 /* Use LEA to deallocate stack space. In peephole2 it will be converted
25861 to pop or add instruction if registers are available. */
25862 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
25863 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
25868 /* Implement TARGET_IRA_COVER_CLASSES. If -mfpmath=sse, we prefer
25869 SSE_REGS to FLOAT_REGS if their costs for a pseudo are the
25871 static const reg_class_t *
25872 i386_ira_cover_classes (void)
25874 static const reg_class_t sse_fpmath_classes[] = {
25875 GENERAL_REGS, SSE_REGS, MMX_REGS, FLOAT_REGS, LIM_REG_CLASSES
25877 static const reg_class_t no_sse_fpmath_classes[] = {
25878 GENERAL_REGS, FLOAT_REGS, MMX_REGS, SSE_REGS, LIM_REG_CLASSES
25881 return TARGET_SSE_MATH ? sse_fpmath_classes : no_sse_fpmath_classes;
25884 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
25885 QImode must go into class Q_REGS.
25886 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
25887 movdf to do mem-to-mem moves through integer regs. */
25889 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
25891 enum machine_mode mode = GET_MODE (x);
25893 /* We're only allowed to return a subclass of CLASS. Many of the
25894 following checks fail for NO_REGS, so eliminate that early. */
25895 if (regclass == NO_REGS)
25898 /* All classes can load zeros. */
25899 if (x == CONST0_RTX (mode))
25902 /* Force constants into memory if we are loading a (nonzero) constant into
25903 an MMX or SSE register. This is because there are no MMX/SSE instructions
25904 to load from a constant. */
25906 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
25909 /* Prefer SSE regs only, if we can use them for math. */
25910 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
25911 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
25913 /* Floating-point constants need more complex checks. */
25914 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
25916 /* General regs can load everything. */
25917 if (reg_class_subset_p (regclass, GENERAL_REGS))
25920 /* Floats can load 0 and 1 plus some others. Note that we eliminated
25921 zero above. We only want to wind up preferring 80387 registers if
25922 we plan on doing computation with them. */
25924 && standard_80387_constant_p (x))
25926 /* Limit class to non-sse. */
25927 if (regclass == FLOAT_SSE_REGS)
25929 if (regclass == FP_TOP_SSE_REGS)
25931 if (regclass == FP_SECOND_SSE_REGS)
25932 return FP_SECOND_REG;
25933 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
25940 /* Generally when we see PLUS here, it's the function invariant
25941 (plus soft-fp const_int). Which can only be computed into general
25943 if (GET_CODE (x) == PLUS)
25944 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
25946 /* QImode constants are easy to load, but non-constant QImode data
25947 must go into Q_REGS. */
25948 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
25950 if (reg_class_subset_p (regclass, Q_REGS))
25952 if (reg_class_subset_p (Q_REGS, regclass))
25960 /* Discourage putting floating-point values in SSE registers unless
25961 SSE math is being used, and likewise for the 387 registers. */
25963 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
25965 enum machine_mode mode = GET_MODE (x);
25967 /* Restrict the output reload class to the register bank that we are doing
25968 math on. If we would like not to return a subset of CLASS, reject this
25969 alternative: if reload cannot do this, it will still use its choice. */
25970 mode = GET_MODE (x);
25971 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
25972 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
25974 if (X87_FLOAT_MODE_P (mode))
25976 if (regclass == FP_TOP_SSE_REGS)
25978 else if (regclass == FP_SECOND_SSE_REGS)
25979 return FP_SECOND_REG;
25981 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
25988 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
25989 enum machine_mode mode,
25990 secondary_reload_info *sri ATTRIBUTE_UNUSED)
25992 /* QImode spills from non-QI registers require
25993 intermediate register on 32bit targets. */
25994 if (!in_p && mode == QImode && !TARGET_64BIT
25995 && (rclass == GENERAL_REGS
25996 || rclass == LEGACY_REGS
25997 || rclass == INDEX_REGS))
26006 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
26007 regno = true_regnum (x);
26009 /* Return Q_REGS if the operand is in memory. */
26017 /* If we are copying between general and FP registers, we need a memory
26018 location. The same is true for SSE and MMX registers.
26020 To optimize register_move_cost performance, allow inline variant.
26022 The macro can't work reliably when one of the CLASSES is class containing
26023 registers from multiple units (SSE, MMX, integer). We avoid this by never
26024 combining those units in single alternative in the machine description.
26025 Ensure that this constraint holds to avoid unexpected surprises.
26027 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
26028 enforce these sanity checks. */
26031 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26032 enum machine_mode mode, int strict)
26034 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
26035 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
26036 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
26037 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
26038 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
26039 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
26041 gcc_assert (!strict);
26045 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
26048 /* ??? This is a lie. We do have moves between mmx/general, and for
26049 mmx/sse2. But by saying we need secondary memory we discourage the
26050 register allocator from using the mmx registers unless needed. */
26051 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
26054 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26056 /* SSE1 doesn't have any direct moves from other classes. */
26060 /* If the target says that inter-unit moves are more expensive
26061 than moving through memory, then don't generate them. */
26062 if (!TARGET_INTER_UNIT_MOVES)
26065 /* Between SSE and general, we have moves no larger than word size. */
26066 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
26074 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26075 enum machine_mode mode, int strict)
26077 return inline_secondary_memory_needed (class1, class2, mode, strict);
26080 /* Return true if the registers in CLASS cannot represent the change from
26081 modes FROM to TO. */
26084 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
26085 enum reg_class regclass)
26090 /* x87 registers can't do subreg at all, as all values are reformatted
26091 to extended precision. */
26092 if (MAYBE_FLOAT_CLASS_P (regclass))
26095 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
26097 /* Vector registers do not support QI or HImode loads. If we don't
26098 disallow a change to these modes, reload will assume it's ok to
26099 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
26100 the vec_dupv4hi pattern. */
26101 if (GET_MODE_SIZE (from) < 4)
26104 /* Vector registers do not support subreg with nonzero offsets, which
26105 are otherwise valid for integer registers. Since we can't see
26106 whether we have a nonzero offset from here, prohibit all
26107 nonparadoxical subregs changing size. */
26108 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
26115 /* Return the cost of moving data of mode M between a
26116 register and memory. A value of 2 is the default; this cost is
26117 relative to those in `REGISTER_MOVE_COST'.
26119 This function is used extensively by register_move_cost that is used to
26120 build tables at startup. Make it inline in this case.
26121 When IN is 2, return maximum of in and out move cost.
26123 If moving between registers and memory is more expensive than
26124 between two registers, you should define this macro to express the
26127 Model also increased moving costs of QImode registers in non
26131 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
26135 if (FLOAT_CLASS_P (regclass))
26153 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
26154 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
26156 if (SSE_CLASS_P (regclass))
26159 switch (GET_MODE_SIZE (mode))
26174 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
26175 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
26177 if (MMX_CLASS_P (regclass))
26180 switch (GET_MODE_SIZE (mode))
26192 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
26193 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
26195 switch (GET_MODE_SIZE (mode))
26198 if (Q_CLASS_P (regclass) || TARGET_64BIT)
26201 return ix86_cost->int_store[0];
26202 if (TARGET_PARTIAL_REG_DEPENDENCY
26203 && optimize_function_for_speed_p (cfun))
26204 cost = ix86_cost->movzbl_load;
26206 cost = ix86_cost->int_load[0];
26208 return MAX (cost, ix86_cost->int_store[0]);
26214 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
26216 return ix86_cost->movzbl_load;
26218 return ix86_cost->int_store[0] + 4;
26223 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
26224 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
26226 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
26227 if (mode == TFmode)
26230 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
26232 cost = ix86_cost->int_load[2];
26234 cost = ix86_cost->int_store[2];
26235 return (cost * (((int) GET_MODE_SIZE (mode)
26236 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
26241 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
26244 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
26248 /* Return the cost of moving data from a register in class CLASS1 to
26249 one in class CLASS2.
26251 It is not required that the cost always equal 2 when FROM is the same as TO;
26252 on some machines it is expensive to move between registers if they are not
26253 general registers. */
26256 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
26257 reg_class_t class2_i)
26259 enum reg_class class1 = (enum reg_class) class1_i;
26260 enum reg_class class2 = (enum reg_class) class2_i;
26262 /* In case we require secondary memory, compute cost of the store followed
26263 by load. In order to avoid bad register allocation choices, we need
26264 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
26266 if (inline_secondary_memory_needed (class1, class2, mode, 0))
26270 cost += inline_memory_move_cost (mode, class1, 2);
26271 cost += inline_memory_move_cost (mode, class2, 2);
26273 /* In case of copying from general_purpose_register we may emit multiple
26274 stores followed by single load causing memory size mismatch stall.
26275 Count this as arbitrarily high cost of 20. */
26276 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
26279 /* In the case of FP/MMX moves, the registers actually overlap, and we
26280 have to switch modes in order to treat them differently. */
26281 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
26282 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
26288 /* Moves between SSE/MMX and integer unit are expensive. */
26289 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
26290 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26292 /* ??? By keeping returned value relatively high, we limit the number
26293 of moves between integer and MMX/SSE registers for all targets.
26294 Additionally, high value prevents problem with x86_modes_tieable_p(),
26295 where integer modes in MMX/SSE registers are not tieable
26296 because of missing QImode and HImode moves to, from or between
26297 MMX/SSE registers. */
26298 return MAX (8, ix86_cost->mmxsse_to_integer);
26300 if (MAYBE_FLOAT_CLASS_P (class1))
26301 return ix86_cost->fp_move;
26302 if (MAYBE_SSE_CLASS_P (class1))
26303 return ix86_cost->sse_move;
26304 if (MAYBE_MMX_CLASS_P (class1))
26305 return ix86_cost->mmx_move;
26309 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
26312 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
26314 /* Flags and only flags can only hold CCmode values. */
26315 if (CC_REGNO_P (regno))
26316 return GET_MODE_CLASS (mode) == MODE_CC;
26317 if (GET_MODE_CLASS (mode) == MODE_CC
26318 || GET_MODE_CLASS (mode) == MODE_RANDOM
26319 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
26321 if (FP_REGNO_P (regno))
26322 return VALID_FP_MODE_P (mode);
26323 if (SSE_REGNO_P (regno))
26325 /* We implement the move patterns for all vector modes into and
26326 out of SSE registers, even when no operation instructions
26327 are available. OImode move is available only when AVX is
26329 return ((TARGET_AVX && mode == OImode)
26330 || VALID_AVX256_REG_MODE (mode)
26331 || VALID_SSE_REG_MODE (mode)
26332 || VALID_SSE2_REG_MODE (mode)
26333 || VALID_MMX_REG_MODE (mode)
26334 || VALID_MMX_REG_MODE_3DNOW (mode));
26336 if (MMX_REGNO_P (regno))
26338 /* We implement the move patterns for 3DNOW modes even in MMX mode,
26339 so if the register is available at all, then we can move data of
26340 the given mode into or out of it. */
26341 return (VALID_MMX_REG_MODE (mode)
26342 || VALID_MMX_REG_MODE_3DNOW (mode));
26345 if (mode == QImode)
26347 /* Take care for QImode values - they can be in non-QI regs,
26348 but then they do cause partial register stalls. */
26349 if (regno <= BX_REG || TARGET_64BIT)
26351 if (!TARGET_PARTIAL_REG_STALL)
26353 return reload_in_progress || reload_completed;
26355 /* We handle both integer and floats in the general purpose registers. */
26356 else if (VALID_INT_MODE_P (mode))
26358 else if (VALID_FP_MODE_P (mode))
26360 else if (VALID_DFP_MODE_P (mode))
26362 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
26363 on to use that value in smaller contexts, this can easily force a
26364 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
26365 supporting DImode, allow it. */
26366 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
26372 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
26373 tieable integer mode. */
26376 ix86_tieable_integer_mode_p (enum machine_mode mode)
26385 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
26388 return TARGET_64BIT;
26395 /* Return true if MODE1 is accessible in a register that can hold MODE2
26396 without copying. That is, all register classes that can hold MODE2
26397 can also hold MODE1. */
26400 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
26402 if (mode1 == mode2)
26405 if (ix86_tieable_integer_mode_p (mode1)
26406 && ix86_tieable_integer_mode_p (mode2))
26409 /* MODE2 being XFmode implies fp stack or general regs, which means we
26410 can tie any smaller floating point modes to it. Note that we do not
26411 tie this with TFmode. */
26412 if (mode2 == XFmode)
26413 return mode1 == SFmode || mode1 == DFmode;
26415 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
26416 that we can tie it with SFmode. */
26417 if (mode2 == DFmode)
26418 return mode1 == SFmode;
26420 /* If MODE2 is only appropriate for an SSE register, then tie with
26421 any other mode acceptable to SSE registers. */
26422 if (GET_MODE_SIZE (mode2) == 16
26423 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
26424 return (GET_MODE_SIZE (mode1) == 16
26425 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
26427 /* If MODE2 is appropriate for an MMX register, then tie
26428 with any other mode acceptable to MMX registers. */
26429 if (GET_MODE_SIZE (mode2) == 8
26430 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
26431 return (GET_MODE_SIZE (mode1) == 8
26432 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
26437 /* Compute a (partial) cost for rtx X. Return true if the complete
26438 cost has been computed, and false if subexpressions should be
26439 scanned. In either case, *TOTAL contains the cost result. */
26442 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
26444 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
26445 enum machine_mode mode = GET_MODE (x);
26446 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
26454 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
26456 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
26458 else if (flag_pic && SYMBOLIC_CONST (x)
26460 || (!GET_CODE (x) != LABEL_REF
26461 && (GET_CODE (x) != SYMBOL_REF
26462 || !SYMBOL_REF_LOCAL_P (x)))))
26469 if (mode == VOIDmode)
26472 switch (standard_80387_constant_p (x))
26477 default: /* Other constants */
26482 /* Start with (MEM (SYMBOL_REF)), since that's where
26483 it'll probably end up. Add a penalty for size. */
26484 *total = (COSTS_N_INSNS (1)
26485 + (flag_pic != 0 && !TARGET_64BIT)
26486 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
26492 /* The zero extensions is often completely free on x86_64, so make
26493 it as cheap as possible. */
26494 if (TARGET_64BIT && mode == DImode
26495 && GET_MODE (XEXP (x, 0)) == SImode)
26497 else if (TARGET_ZERO_EXTEND_WITH_AND)
26498 *total = cost->add;
26500 *total = cost->movzx;
26504 *total = cost->movsx;
26508 if (CONST_INT_P (XEXP (x, 1))
26509 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
26511 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26514 *total = cost->add;
26517 if ((value == 2 || value == 3)
26518 && cost->lea <= cost->shift_const)
26520 *total = cost->lea;
26530 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
26532 if (CONST_INT_P (XEXP (x, 1)))
26534 if (INTVAL (XEXP (x, 1)) > 32)
26535 *total = cost->shift_const + COSTS_N_INSNS (2);
26537 *total = cost->shift_const * 2;
26541 if (GET_CODE (XEXP (x, 1)) == AND)
26542 *total = cost->shift_var * 2;
26544 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
26549 if (CONST_INT_P (XEXP (x, 1)))
26550 *total = cost->shift_const;
26552 *total = cost->shift_var;
26557 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26559 /* ??? SSE scalar cost should be used here. */
26560 *total = cost->fmul;
26563 else if (X87_FLOAT_MODE_P (mode))
26565 *total = cost->fmul;
26568 else if (FLOAT_MODE_P (mode))
26570 /* ??? SSE vector cost should be used here. */
26571 *total = cost->fmul;
26576 rtx op0 = XEXP (x, 0);
26577 rtx op1 = XEXP (x, 1);
26579 if (CONST_INT_P (XEXP (x, 1)))
26581 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26582 for (nbits = 0; value != 0; value &= value - 1)
26586 /* This is arbitrary. */
26589 /* Compute costs correctly for widening multiplication. */
26590 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
26591 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
26592 == GET_MODE_SIZE (mode))
26594 int is_mulwiden = 0;
26595 enum machine_mode inner_mode = GET_MODE (op0);
26597 if (GET_CODE (op0) == GET_CODE (op1))
26598 is_mulwiden = 1, op1 = XEXP (op1, 0);
26599 else if (CONST_INT_P (op1))
26601 if (GET_CODE (op0) == SIGN_EXTEND)
26602 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
26605 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
26609 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
26612 *total = (cost->mult_init[MODE_INDEX (mode)]
26613 + nbits * cost->mult_bit
26614 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
26623 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26624 /* ??? SSE cost should be used here. */
26625 *total = cost->fdiv;
26626 else if (X87_FLOAT_MODE_P (mode))
26627 *total = cost->fdiv;
26628 else if (FLOAT_MODE_P (mode))
26629 /* ??? SSE vector cost should be used here. */
26630 *total = cost->fdiv;
26632 *total = cost->divide[MODE_INDEX (mode)];
26636 if (GET_MODE_CLASS (mode) == MODE_INT
26637 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
26639 if (GET_CODE (XEXP (x, 0)) == PLUS
26640 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
26641 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
26642 && CONSTANT_P (XEXP (x, 1)))
26644 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
26645 if (val == 2 || val == 4 || val == 8)
26647 *total = cost->lea;
26648 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26649 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
26650 outer_code, speed);
26651 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26655 else if (GET_CODE (XEXP (x, 0)) == MULT
26656 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
26658 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
26659 if (val == 2 || val == 4 || val == 8)
26661 *total = cost->lea;
26662 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26663 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26667 else if (GET_CODE (XEXP (x, 0)) == PLUS)
26669 *total = cost->lea;
26670 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26671 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26672 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26679 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26681 /* ??? SSE cost should be used here. */
26682 *total = cost->fadd;
26685 else if (X87_FLOAT_MODE_P (mode))
26687 *total = cost->fadd;
26690 else if (FLOAT_MODE_P (mode))
26692 /* ??? SSE vector cost should be used here. */
26693 *total = cost->fadd;
26701 if (!TARGET_64BIT && mode == DImode)
26703 *total = (cost->add * 2
26704 + (rtx_cost (XEXP (x, 0), outer_code, speed)
26705 << (GET_MODE (XEXP (x, 0)) != DImode))
26706 + (rtx_cost (XEXP (x, 1), outer_code, speed)
26707 << (GET_MODE (XEXP (x, 1)) != DImode)));
26713 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26715 /* ??? SSE cost should be used here. */
26716 *total = cost->fchs;
26719 else if (X87_FLOAT_MODE_P (mode))
26721 *total = cost->fchs;
26724 else if (FLOAT_MODE_P (mode))
26726 /* ??? SSE vector cost should be used here. */
26727 *total = cost->fchs;
26733 if (!TARGET_64BIT && mode == DImode)
26734 *total = cost->add * 2;
26736 *total = cost->add;
26740 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
26741 && XEXP (XEXP (x, 0), 1) == const1_rtx
26742 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
26743 && XEXP (x, 1) == const0_rtx)
26745 /* This kind of construct is implemented using test[bwl].
26746 Treat it as if we had an AND. */
26747 *total = (cost->add
26748 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
26749 + rtx_cost (const1_rtx, outer_code, speed));
26755 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
26760 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26761 /* ??? SSE cost should be used here. */
26762 *total = cost->fabs;
26763 else if (X87_FLOAT_MODE_P (mode))
26764 *total = cost->fabs;
26765 else if (FLOAT_MODE_P (mode))
26766 /* ??? SSE vector cost should be used here. */
26767 *total = cost->fabs;
26771 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26772 /* ??? SSE cost should be used here. */
26773 *total = cost->fsqrt;
26774 else if (X87_FLOAT_MODE_P (mode))
26775 *total = cost->fsqrt;
26776 else if (FLOAT_MODE_P (mode))
26777 /* ??? SSE vector cost should be used here. */
26778 *total = cost->fsqrt;
26782 if (XINT (x, 1) == UNSPEC_TP)
26789 case VEC_DUPLICATE:
26790 /* ??? Assume all of these vector manipulation patterns are
26791 recognizable. In which case they all pretty much have the
26793 *total = COSTS_N_INSNS (1);
26803 static int current_machopic_label_num;
26805 /* Given a symbol name and its associated stub, write out the
26806 definition of the stub. */
26809 machopic_output_stub (FILE *file, const char *symb, const char *stub)
26811 unsigned int length;
26812 char *binder_name, *symbol_name, lazy_ptr_name[32];
26813 int label = ++current_machopic_label_num;
26815 /* For 64-bit we shouldn't get here. */
26816 gcc_assert (!TARGET_64BIT);
26818 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
26819 symb = targetm.strip_name_encoding (symb);
26821 length = strlen (stub);
26822 binder_name = XALLOCAVEC (char, length + 32);
26823 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
26825 length = strlen (symb);
26826 symbol_name = XALLOCAVEC (char, length + 32);
26827 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
26829 sprintf (lazy_ptr_name, "L%d$lz", label);
26832 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
26834 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
26836 fprintf (file, "%s:\n", stub);
26837 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26841 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
26842 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
26843 fprintf (file, "\tjmp\t*%%edx\n");
26846 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
26848 fprintf (file, "%s:\n", binder_name);
26852 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
26853 fputs ("\tpushl\t%eax\n", file);
26856 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
26858 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
26860 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
26861 fprintf (file, "%s:\n", lazy_ptr_name);
26862 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26863 fprintf (file, ASM_LONG "%s\n", binder_name);
26865 #endif /* TARGET_MACHO */
26867 /* Order the registers for register allocator. */
26870 x86_order_regs_for_local_alloc (void)
26875 /* First allocate the local general purpose registers. */
26876 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26877 if (GENERAL_REGNO_P (i) && call_used_regs[i])
26878 reg_alloc_order [pos++] = i;
26880 /* Global general purpose registers. */
26881 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26882 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
26883 reg_alloc_order [pos++] = i;
26885 /* x87 registers come first in case we are doing FP math
26887 if (!TARGET_SSE_MATH)
26888 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
26889 reg_alloc_order [pos++] = i;
26891 /* SSE registers. */
26892 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
26893 reg_alloc_order [pos++] = i;
26894 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
26895 reg_alloc_order [pos++] = i;
26897 /* x87 registers. */
26898 if (TARGET_SSE_MATH)
26899 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
26900 reg_alloc_order [pos++] = i;
26902 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
26903 reg_alloc_order [pos++] = i;
26905 /* Initialize the rest of array as we do not allocate some registers
26907 while (pos < FIRST_PSEUDO_REGISTER)
26908 reg_alloc_order [pos++] = 0;
26911 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
26912 struct attribute_spec.handler. */
26914 ix86_handle_abi_attribute (tree *node, tree name,
26915 tree args ATTRIBUTE_UNUSED,
26916 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26918 if (TREE_CODE (*node) != FUNCTION_TYPE
26919 && TREE_CODE (*node) != METHOD_TYPE
26920 && TREE_CODE (*node) != FIELD_DECL
26921 && TREE_CODE (*node) != TYPE_DECL)
26923 warning (OPT_Wattributes, "%qE attribute only applies to functions",
26925 *no_add_attrs = true;
26930 warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
26932 *no_add_attrs = true;
26936 /* Can combine regparm with all attributes but fastcall. */
26937 if (is_attribute_p ("ms_abi", name))
26939 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
26941 error ("ms_abi and sysv_abi attributes are not compatible");
26946 else if (is_attribute_p ("sysv_abi", name))
26948 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
26950 error ("ms_abi and sysv_abi attributes are not compatible");
26959 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
26960 struct attribute_spec.handler. */
26962 ix86_handle_struct_attribute (tree *node, tree name,
26963 tree args ATTRIBUTE_UNUSED,
26964 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26967 if (DECL_P (*node))
26969 if (TREE_CODE (*node) == TYPE_DECL)
26970 type = &TREE_TYPE (*node);
26975 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
26976 || TREE_CODE (*type) == UNION_TYPE)))
26978 warning (OPT_Wattributes, "%qE attribute ignored",
26980 *no_add_attrs = true;
26983 else if ((is_attribute_p ("ms_struct", name)
26984 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
26985 || ((is_attribute_p ("gcc_struct", name)
26986 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
26988 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
26990 *no_add_attrs = true;
26997 ix86_handle_fndecl_attribute (tree *node, tree name,
26998 tree args ATTRIBUTE_UNUSED,
26999 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27001 if (TREE_CODE (*node) != FUNCTION_DECL)
27003 warning (OPT_Wattributes, "%qE attribute only applies to functions",
27005 *no_add_attrs = true;
27009 #ifndef HAVE_AS_IX86_SWAP
27011 sorry ("ms_hook_prologue attribute needs assembler swap suffix support");
27018 ix86_ms_bitfield_layout_p (const_tree record_type)
27020 return (TARGET_MS_BITFIELD_LAYOUT &&
27021 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
27022 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
27025 /* Returns an expression indicating where the this parameter is
27026 located on entry to the FUNCTION. */
27029 x86_this_parameter (tree function)
27031 tree type = TREE_TYPE (function);
27032 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
27037 const int *parm_regs;
27039 if (ix86_function_type_abi (type) == MS_ABI)
27040 parm_regs = x86_64_ms_abi_int_parameter_registers;
27042 parm_regs = x86_64_int_parameter_registers;
27043 return gen_rtx_REG (DImode, parm_regs[aggr]);
27046 nregs = ix86_function_regparm (type, function);
27048 if (nregs > 0 && !stdarg_p (type))
27052 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
27053 regno = aggr ? DX_REG : CX_REG;
27054 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
27058 return gen_rtx_MEM (SImode,
27059 plus_constant (stack_pointer_rtx, 4));
27068 return gen_rtx_MEM (SImode,
27069 plus_constant (stack_pointer_rtx, 4));
27072 return gen_rtx_REG (SImode, regno);
27075 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
27078 /* Determine whether x86_output_mi_thunk can succeed. */
27081 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
27082 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
27083 HOST_WIDE_INT vcall_offset, const_tree function)
27085 /* 64-bit can handle anything. */
27089 /* For 32-bit, everything's fine if we have one free register. */
27090 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
27093 /* Need a free register for vcall_offset. */
27097 /* Need a free register for GOT references. */
27098 if (flag_pic && !targetm.binds_local_p (function))
27101 /* Otherwise ok. */
27105 /* Output the assembler code for a thunk function. THUNK_DECL is the
27106 declaration for the thunk function itself, FUNCTION is the decl for
27107 the target function. DELTA is an immediate constant offset to be
27108 added to THIS. If VCALL_OFFSET is nonzero, the word at
27109 *(*this + vcall_offset) should be added to THIS. */
27112 x86_output_mi_thunk (FILE *file,
27113 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
27114 HOST_WIDE_INT vcall_offset, tree function)
27117 rtx this_param = x86_this_parameter (function);
27120 /* Make sure unwind info is emitted for the thunk if needed. */
27121 final_start_function (emit_barrier (), file, 1);
27123 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
27124 pull it in now and let DELTA benefit. */
27125 if (REG_P (this_param))
27126 this_reg = this_param;
27127 else if (vcall_offset)
27129 /* Put the this parameter into %eax. */
27130 xops[0] = this_param;
27131 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
27132 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27135 this_reg = NULL_RTX;
27137 /* Adjust the this parameter by a fixed constant. */
27140 xops[0] = GEN_INT (delta);
27141 xops[1] = this_reg ? this_reg : this_param;
27144 if (!x86_64_general_operand (xops[0], DImode))
27146 tmp = gen_rtx_REG (DImode, R10_REG);
27148 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
27150 xops[1] = this_param;
27152 if (x86_maybe_negate_const_int (&xops[0], DImode))
27153 output_asm_insn ("sub{q}\t{%0, %1|%1, %0}", xops);
27155 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
27157 else if (x86_maybe_negate_const_int (&xops[0], SImode))
27158 output_asm_insn ("sub{l}\t{%0, %1|%1, %0}", xops);
27160 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
27163 /* Adjust the this parameter by a value stored in the vtable. */
27167 tmp = gen_rtx_REG (DImode, R10_REG);
27170 int tmp_regno = CX_REG;
27171 if (lookup_attribute ("fastcall",
27172 TYPE_ATTRIBUTES (TREE_TYPE (function)))
27173 || lookup_attribute ("thiscall",
27174 TYPE_ATTRIBUTES (TREE_TYPE (function))))
27175 tmp_regno = AX_REG;
27176 tmp = gen_rtx_REG (SImode, tmp_regno);
27179 xops[0] = gen_rtx_MEM (Pmode, this_reg);
27181 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27183 /* Adjust the this parameter. */
27184 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
27185 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
27187 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
27188 xops[0] = GEN_INT (vcall_offset);
27190 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
27191 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
27193 xops[1] = this_reg;
27194 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
27197 /* If necessary, drop THIS back to its stack slot. */
27198 if (this_reg && this_reg != this_param)
27200 xops[0] = this_reg;
27201 xops[1] = this_param;
27202 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27205 xops[0] = XEXP (DECL_RTL (function), 0);
27208 if (!flag_pic || targetm.binds_local_p (function))
27209 output_asm_insn ("jmp\t%P0", xops);
27210 /* All thunks should be in the same object as their target,
27211 and thus binds_local_p should be true. */
27212 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
27213 gcc_unreachable ();
27216 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
27217 tmp = gen_rtx_CONST (Pmode, tmp);
27218 tmp = gen_rtx_MEM (QImode, tmp);
27220 output_asm_insn ("jmp\t%A0", xops);
27225 if (!flag_pic || targetm.binds_local_p (function))
27226 output_asm_insn ("jmp\t%P0", xops);
27231 rtx sym_ref = XEXP (DECL_RTL (function), 0);
27232 if (TARGET_MACHO_BRANCH_ISLANDS)
27233 sym_ref = (gen_rtx_SYMBOL_REF
27235 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
27236 tmp = gen_rtx_MEM (QImode, sym_ref);
27238 output_asm_insn ("jmp\t%0", xops);
27241 #endif /* TARGET_MACHO */
27243 tmp = gen_rtx_REG (SImode, CX_REG);
27244 output_set_got (tmp, NULL_RTX);
27247 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
27248 output_asm_insn ("jmp\t{*}%1", xops);
27251 final_end_function ();
27255 x86_file_start (void)
27257 default_file_start ();
27259 darwin_file_start ();
27261 if (X86_FILE_START_VERSION_DIRECTIVE)
27262 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
27263 if (X86_FILE_START_FLTUSED)
27264 fputs ("\t.global\t__fltused\n", asm_out_file);
27265 if (ix86_asm_dialect == ASM_INTEL)
27266 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
27270 x86_field_alignment (tree field, int computed)
27272 enum machine_mode mode;
27273 tree type = TREE_TYPE (field);
27275 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
27277 mode = TYPE_MODE (strip_array_types (type));
27278 if (mode == DFmode || mode == DCmode
27279 || GET_MODE_CLASS (mode) == MODE_INT
27280 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
27281 return MIN (32, computed);
27285 /* Output assembler code to FILE to increment profiler label # LABELNO
27286 for profiling a function entry. */
27288 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
27292 #ifndef NO_PROFILE_COUNTERS
27293 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
27296 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
27297 fputs ("\tcall\t*" MCOUNT_NAME "@GOTPCREL(%rip)\n", file);
27299 fputs ("\tcall\t" MCOUNT_NAME "\n", file);
27303 #ifndef NO_PROFILE_COUNTERS
27304 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
27307 fputs ("\tcall\t*" MCOUNT_NAME "@GOT(%ebx)\n", file);
27311 #ifndef NO_PROFILE_COUNTERS
27312 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
27315 fputs ("\tcall\t" MCOUNT_NAME "\n", file);
27319 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
27320 /* We don't have exact information about the insn sizes, but we may assume
27321 quite safely that we are informed about all 1 byte insns and memory
27322 address sizes. This is enough to eliminate unnecessary padding in
27326 min_insn_size (rtx insn)
27330 if (!INSN_P (insn) || !active_insn_p (insn))
27333 /* Discard alignments we've emit and jump instructions. */
27334 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
27335 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
27337 if (JUMP_TABLE_DATA_P (insn))
27340 /* Important case - calls are always 5 bytes.
27341 It is common to have many calls in the row. */
27343 && symbolic_reference_mentioned_p (PATTERN (insn))
27344 && !SIBLING_CALL_P (insn))
27346 len = get_attr_length (insn);
27350 /* For normal instructions we rely on get_attr_length being exact,
27351 with a few exceptions. */
27352 if (!JUMP_P (insn))
27354 enum attr_type type = get_attr_type (insn);
27359 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
27360 || asm_noperands (PATTERN (insn)) >= 0)
27367 /* Otherwise trust get_attr_length. */
27371 l = get_attr_length_address (insn);
27372 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
27381 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
27385 ix86_avoid_jump_mispredicts (void)
27387 rtx insn, start = get_insns ();
27388 int nbytes = 0, njumps = 0;
27391 /* Look for all minimal intervals of instructions containing 4 jumps.
27392 The intervals are bounded by START and INSN. NBYTES is the total
27393 size of instructions in the interval including INSN and not including
27394 START. When the NBYTES is smaller than 16 bytes, it is possible
27395 that the end of START and INSN ends up in the same 16byte page.
27397 The smallest offset in the page INSN can start is the case where START
27398 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
27399 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
27401 for (insn = start; insn; insn = NEXT_INSN (insn))
27405 if (LABEL_P (insn))
27407 int align = label_to_alignment (insn);
27408 int max_skip = label_to_max_skip (insn);
27412 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
27413 already in the current 16 byte page, because otherwise
27414 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
27415 bytes to reach 16 byte boundary. */
27417 || (align <= 3 && max_skip != (1 << align) - 1))
27420 fprintf (dump_file, "Label %i with max_skip %i\n",
27421 INSN_UID (insn), max_skip);
27424 while (nbytes + max_skip >= 16)
27426 start = NEXT_INSN (start);
27427 if ((JUMP_P (start)
27428 && GET_CODE (PATTERN (start)) != ADDR_VEC
27429 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27431 njumps--, isjump = 1;
27434 nbytes -= min_insn_size (start);
27440 min_size = min_insn_size (insn);
27441 nbytes += min_size;
27443 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
27444 INSN_UID (insn), min_size);
27446 && GET_CODE (PATTERN (insn)) != ADDR_VEC
27447 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
27455 start = NEXT_INSN (start);
27456 if ((JUMP_P (start)
27457 && GET_CODE (PATTERN (start)) != ADDR_VEC
27458 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27460 njumps--, isjump = 1;
27463 nbytes -= min_insn_size (start);
27465 gcc_assert (njumps >= 0);
27467 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
27468 INSN_UID (start), INSN_UID (insn), nbytes);
27470 if (njumps == 3 && isjump && nbytes < 16)
27472 int padsize = 15 - nbytes + min_insn_size (insn);
27475 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
27476 INSN_UID (insn), padsize);
27477 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
27483 /* AMD Athlon works faster
27484 when RET is not destination of conditional jump or directly preceded
27485 by other jump instruction. We avoid the penalty by inserting NOP just
27486 before the RET instructions in such cases. */
27488 ix86_pad_returns (void)
27493 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
27495 basic_block bb = e->src;
27496 rtx ret = BB_END (bb);
27498 bool replace = false;
27500 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
27501 || optimize_bb_for_size_p (bb))
27503 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
27504 if (active_insn_p (prev) || LABEL_P (prev))
27506 if (prev && LABEL_P (prev))
27511 FOR_EACH_EDGE (e, ei, bb->preds)
27512 if (EDGE_FREQUENCY (e) && e->src->index >= 0
27513 && !(e->flags & EDGE_FALLTHRU))
27518 prev = prev_active_insn (ret);
27520 && ((JUMP_P (prev) && any_condjump_p (prev))
27523 /* Empty functions get branch mispredict even when the jump destination
27524 is not visible to us. */
27525 if (!prev && !optimize_function_for_size_p (cfun))
27530 emit_jump_insn_before (gen_return_internal_long (), ret);
27536 /* Implement machine specific optimizations. We implement padding of returns
27537 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
27541 if (optimize && optimize_function_for_speed_p (cfun))
27543 if (TARGET_PAD_RETURNS)
27544 ix86_pad_returns ();
27545 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
27546 if (TARGET_FOUR_JUMP_LIMIT)
27547 ix86_avoid_jump_mispredicts ();
27552 /* Return nonzero when QImode register that must be represented via REX prefix
27555 x86_extended_QIreg_mentioned_p (rtx insn)
27558 extract_insn_cached (insn);
27559 for (i = 0; i < recog_data.n_operands; i++)
27560 if (REG_P (recog_data.operand[i])
27561 && REGNO (recog_data.operand[i]) > BX_REG)
27566 /* Return nonzero when P points to register encoded via REX prefix.
27567 Called via for_each_rtx. */
27569 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
27571 unsigned int regno;
27574 regno = REGNO (*p);
27575 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
27578 /* Return true when INSN mentions register that must be encoded using REX
27581 x86_extended_reg_mentioned_p (rtx insn)
27583 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
27584 extended_reg_mentioned_1, NULL);
27587 /* If profitable, negate (without causing overflow) integer constant
27588 of mode MODE at location LOC. Return true in this case. */
27590 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
27594 if (!CONST_INT_P (*loc))
27600 /* DImode x86_64 constants must fit in 32 bits. */
27601 gcc_assert (x86_64_immediate_operand (*loc, mode));
27612 gcc_unreachable ();
27615 /* Avoid overflows. */
27616 if (mode_signbit_p (mode, *loc))
27619 val = INTVAL (*loc);
27621 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
27622 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
27623 if ((val < 0 && val != -128)
27626 *loc = GEN_INT (-val);
27633 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
27634 optabs would emit if we didn't have TFmode patterns. */
27637 x86_emit_floatuns (rtx operands[2])
27639 rtx neglab, donelab, i0, i1, f0, in, out;
27640 enum machine_mode mode, inmode;
27642 inmode = GET_MODE (operands[1]);
27643 gcc_assert (inmode == SImode || inmode == DImode);
27646 in = force_reg (inmode, operands[1]);
27647 mode = GET_MODE (out);
27648 neglab = gen_label_rtx ();
27649 donelab = gen_label_rtx ();
27650 f0 = gen_reg_rtx (mode);
27652 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
27654 expand_float (out, in, 0);
27656 emit_jump_insn (gen_jump (donelab));
27659 emit_label (neglab);
27661 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
27663 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
27665 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
27667 expand_float (f0, i0, 0);
27669 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
27671 emit_label (donelab);
27674 /* AVX does not support 32-byte integer vector operations,
27675 thus the longest vector we are faced with is V16QImode. */
27676 #define MAX_VECT_LEN 16
27678 struct expand_vec_perm_d
27680 rtx target, op0, op1;
27681 unsigned char perm[MAX_VECT_LEN];
27682 enum machine_mode vmode;
27683 unsigned char nelt;
27687 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
27688 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
27690 /* Get a vector mode of the same size as the original but with elements
27691 twice as wide. This is only guaranteed to apply to integral vectors. */
27693 static inline enum machine_mode
27694 get_mode_wider_vector (enum machine_mode o)
27696 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
27697 enum machine_mode n = GET_MODE_WIDER_MODE (o);
27698 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
27699 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
27703 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27704 with all elements equal to VAR. Return true if successful. */
27707 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
27708 rtx target, rtx val)
27731 /* First attempt to recognize VAL as-is. */
27732 dup = gen_rtx_VEC_DUPLICATE (mode, val);
27733 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
27734 if (recog_memoized (insn) < 0)
27737 /* If that fails, force VAL into a register. */
27740 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
27741 seq = get_insns ();
27744 emit_insn_before (seq, insn);
27746 ok = recog_memoized (insn) >= 0;
27755 if (TARGET_SSE || TARGET_3DNOW_A)
27759 val = gen_lowpart (SImode, val);
27760 x = gen_rtx_TRUNCATE (HImode, val);
27761 x = gen_rtx_VEC_DUPLICATE (mode, x);
27762 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27775 struct expand_vec_perm_d dperm;
27779 memset (&dperm, 0, sizeof (dperm));
27780 dperm.target = target;
27781 dperm.vmode = mode;
27782 dperm.nelt = GET_MODE_NUNITS (mode);
27783 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
27785 /* Extend to SImode using a paradoxical SUBREG. */
27786 tmp1 = gen_reg_rtx (SImode);
27787 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27789 /* Insert the SImode value as low element of a V4SImode vector. */
27790 tmp2 = gen_lowpart (V4SImode, dperm.op0);
27791 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
27793 ok = (expand_vec_perm_1 (&dperm)
27794 || expand_vec_perm_broadcast_1 (&dperm));
27806 /* Replicate the value once into the next wider mode and recurse. */
27808 enum machine_mode smode, wsmode, wvmode;
27811 smode = GET_MODE_INNER (mode);
27812 wvmode = get_mode_wider_vector (mode);
27813 wsmode = GET_MODE_INNER (wvmode);
27815 val = convert_modes (wsmode, smode, val, true);
27816 x = expand_simple_binop (wsmode, ASHIFT, val,
27817 GEN_INT (GET_MODE_BITSIZE (smode)),
27818 NULL_RTX, 1, OPTAB_LIB_WIDEN);
27819 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
27821 x = gen_lowpart (wvmode, target);
27822 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
27830 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
27831 rtx x = gen_reg_rtx (hvmode);
27833 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
27836 x = gen_rtx_VEC_CONCAT (mode, x, x);
27837 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27846 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27847 whose ONE_VAR element is VAR, and other elements are zero. Return true
27851 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
27852 rtx target, rtx var, int one_var)
27854 enum machine_mode vsimode;
27857 bool use_vector_set = false;
27862 /* For SSE4.1, we normally use vector set. But if the second
27863 element is zero and inter-unit moves are OK, we use movq
27865 use_vector_set = (TARGET_64BIT
27867 && !(TARGET_INTER_UNIT_MOVES
27873 use_vector_set = TARGET_SSE4_1;
27876 use_vector_set = TARGET_SSE2;
27879 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
27886 use_vector_set = TARGET_AVX;
27889 /* Use ix86_expand_vector_set in 64bit mode only. */
27890 use_vector_set = TARGET_AVX && TARGET_64BIT;
27896 if (use_vector_set)
27898 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
27899 var = force_reg (GET_MODE_INNER (mode), var);
27900 ix86_expand_vector_set (mmx_ok, target, var, one_var);
27916 var = force_reg (GET_MODE_INNER (mode), var);
27917 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
27918 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27923 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
27924 new_target = gen_reg_rtx (mode);
27926 new_target = target;
27927 var = force_reg (GET_MODE_INNER (mode), var);
27928 x = gen_rtx_VEC_DUPLICATE (mode, var);
27929 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
27930 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
27933 /* We need to shuffle the value to the correct position, so
27934 create a new pseudo to store the intermediate result. */
27936 /* With SSE2, we can use the integer shuffle insns. */
27937 if (mode != V4SFmode && TARGET_SSE2)
27939 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
27941 GEN_INT (one_var == 1 ? 0 : 1),
27942 GEN_INT (one_var == 2 ? 0 : 1),
27943 GEN_INT (one_var == 3 ? 0 : 1)));
27944 if (target != new_target)
27945 emit_move_insn (target, new_target);
27949 /* Otherwise convert the intermediate result to V4SFmode and
27950 use the SSE1 shuffle instructions. */
27951 if (mode != V4SFmode)
27953 tmp = gen_reg_rtx (V4SFmode);
27954 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
27959 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
27961 GEN_INT (one_var == 1 ? 0 : 1),
27962 GEN_INT (one_var == 2 ? 0+4 : 1+4),
27963 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
27965 if (mode != V4SFmode)
27966 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
27967 else if (tmp != target)
27968 emit_move_insn (target, tmp);
27970 else if (target != new_target)
27971 emit_move_insn (target, new_target);
27976 vsimode = V4SImode;
27982 vsimode = V2SImode;
27988 /* Zero extend the variable element to SImode and recurse. */
27989 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
27991 x = gen_reg_rtx (vsimode);
27992 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
27994 gcc_unreachable ();
27996 emit_move_insn (target, gen_lowpart (mode, x));
28004 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28005 consisting of the values in VALS. It is known that all elements
28006 except ONE_VAR are constants. Return true if successful. */
28009 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
28010 rtx target, rtx vals, int one_var)
28012 rtx var = XVECEXP (vals, 0, one_var);
28013 enum machine_mode wmode;
28016 const_vec = copy_rtx (vals);
28017 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
28018 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
28026 /* For the two element vectors, it's just as easy to use
28027 the general case. */
28031 /* Use ix86_expand_vector_set in 64bit mode only. */
28054 /* There's no way to set one QImode entry easily. Combine
28055 the variable value with its adjacent constant value, and
28056 promote to an HImode set. */
28057 x = XVECEXP (vals, 0, one_var ^ 1);
28060 var = convert_modes (HImode, QImode, var, true);
28061 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
28062 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28063 x = GEN_INT (INTVAL (x) & 0xff);
28067 var = convert_modes (HImode, QImode, var, true);
28068 x = gen_int_mode (INTVAL (x) << 8, HImode);
28070 if (x != const0_rtx)
28071 var = expand_simple_binop (HImode, IOR, var, x, var,
28072 1, OPTAB_LIB_WIDEN);
28074 x = gen_reg_rtx (wmode);
28075 emit_move_insn (x, gen_lowpart (wmode, const_vec));
28076 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
28078 emit_move_insn (target, gen_lowpart (mode, x));
28085 emit_move_insn (target, const_vec);
28086 ix86_expand_vector_set (mmx_ok, target, var, one_var);
28090 /* A subroutine of ix86_expand_vector_init_general. Use vector
28091 concatenate to handle the most general case: all values variable,
28092 and none identical. */
28095 ix86_expand_vector_init_concat (enum machine_mode mode,
28096 rtx target, rtx *ops, int n)
28098 enum machine_mode cmode, hmode = VOIDmode;
28099 rtx first[8], second[4];
28139 gcc_unreachable ();
28142 if (!register_operand (ops[1], cmode))
28143 ops[1] = force_reg (cmode, ops[1]);
28144 if (!register_operand (ops[0], cmode))
28145 ops[0] = force_reg (cmode, ops[0]);
28146 emit_insn (gen_rtx_SET (VOIDmode, target,
28147 gen_rtx_VEC_CONCAT (mode, ops[0],
28167 gcc_unreachable ();
28183 gcc_unreachable ();
28188 /* FIXME: We process inputs backward to help RA. PR 36222. */
28191 for (; i > 0; i -= 2, j--)
28193 first[j] = gen_reg_rtx (cmode);
28194 v = gen_rtvec (2, ops[i - 1], ops[i]);
28195 ix86_expand_vector_init (false, first[j],
28196 gen_rtx_PARALLEL (cmode, v));
28202 gcc_assert (hmode != VOIDmode);
28203 for (i = j = 0; i < n; i += 2, j++)
28205 second[j] = gen_reg_rtx (hmode);
28206 ix86_expand_vector_init_concat (hmode, second [j],
28210 ix86_expand_vector_init_concat (mode, target, second, n);
28213 ix86_expand_vector_init_concat (mode, target, first, n);
28217 gcc_unreachable ();
28221 /* A subroutine of ix86_expand_vector_init_general. Use vector
28222 interleave to handle the most general case: all values variable,
28223 and none identical. */
28226 ix86_expand_vector_init_interleave (enum machine_mode mode,
28227 rtx target, rtx *ops, int n)
28229 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
28232 rtx (*gen_load_even) (rtx, rtx, rtx);
28233 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
28234 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
28239 gen_load_even = gen_vec_setv8hi;
28240 gen_interleave_first_low = gen_vec_interleave_lowv4si;
28241 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28242 inner_mode = HImode;
28243 first_imode = V4SImode;
28244 second_imode = V2DImode;
28245 third_imode = VOIDmode;
28248 gen_load_even = gen_vec_setv16qi;
28249 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
28250 gen_interleave_second_low = gen_vec_interleave_lowv4si;
28251 inner_mode = QImode;
28252 first_imode = V8HImode;
28253 second_imode = V4SImode;
28254 third_imode = V2DImode;
28257 gcc_unreachable ();
28260 for (i = 0; i < n; i++)
28262 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
28263 op0 = gen_reg_rtx (SImode);
28264 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
28266 /* Insert the SImode value as low element of V4SImode vector. */
28267 op1 = gen_reg_rtx (V4SImode);
28268 op0 = gen_rtx_VEC_MERGE (V4SImode,
28269 gen_rtx_VEC_DUPLICATE (V4SImode,
28271 CONST0_RTX (V4SImode),
28273 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
28275 /* Cast the V4SImode vector back to a vector in orignal mode. */
28276 op0 = gen_reg_rtx (mode);
28277 emit_move_insn (op0, gen_lowpart (mode, op1));
28279 /* Load even elements into the second positon. */
28280 emit_insn (gen_load_even (op0,
28281 force_reg (inner_mode,
28285 /* Cast vector to FIRST_IMODE vector. */
28286 ops[i] = gen_reg_rtx (first_imode);
28287 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
28290 /* Interleave low FIRST_IMODE vectors. */
28291 for (i = j = 0; i < n; i += 2, j++)
28293 op0 = gen_reg_rtx (first_imode);
28294 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
28296 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
28297 ops[j] = gen_reg_rtx (second_imode);
28298 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
28301 /* Interleave low SECOND_IMODE vectors. */
28302 switch (second_imode)
28305 for (i = j = 0; i < n / 2; i += 2, j++)
28307 op0 = gen_reg_rtx (second_imode);
28308 emit_insn (gen_interleave_second_low (op0, ops[i],
28311 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
28313 ops[j] = gen_reg_rtx (third_imode);
28314 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
28316 second_imode = V2DImode;
28317 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28321 op0 = gen_reg_rtx (second_imode);
28322 emit_insn (gen_interleave_second_low (op0, ops[0],
28325 /* Cast the SECOND_IMODE vector back to a vector on original
28327 emit_insn (gen_rtx_SET (VOIDmode, target,
28328 gen_lowpart (mode, op0)));
28332 gcc_unreachable ();
28336 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
28337 all values variable, and none identical. */
28340 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
28341 rtx target, rtx vals)
28343 rtx ops[32], op0, op1;
28344 enum machine_mode half_mode = VOIDmode;
28351 if (!mmx_ok && !TARGET_SSE)
28363 n = GET_MODE_NUNITS (mode);
28364 for (i = 0; i < n; i++)
28365 ops[i] = XVECEXP (vals, 0, i);
28366 ix86_expand_vector_init_concat (mode, target, ops, n);
28370 half_mode = V16QImode;
28374 half_mode = V8HImode;
28378 n = GET_MODE_NUNITS (mode);
28379 for (i = 0; i < n; i++)
28380 ops[i] = XVECEXP (vals, 0, i);
28381 op0 = gen_reg_rtx (half_mode);
28382 op1 = gen_reg_rtx (half_mode);
28383 ix86_expand_vector_init_interleave (half_mode, op0, ops,
28385 ix86_expand_vector_init_interleave (half_mode, op1,
28386 &ops [n >> 1], n >> 2);
28387 emit_insn (gen_rtx_SET (VOIDmode, target,
28388 gen_rtx_VEC_CONCAT (mode, op0, op1)));
28392 if (!TARGET_SSE4_1)
28400 /* Don't use ix86_expand_vector_init_interleave if we can't
28401 move from GPR to SSE register directly. */
28402 if (!TARGET_INTER_UNIT_MOVES)
28405 n = GET_MODE_NUNITS (mode);
28406 for (i = 0; i < n; i++)
28407 ops[i] = XVECEXP (vals, 0, i);
28408 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
28416 gcc_unreachable ();
28420 int i, j, n_elts, n_words, n_elt_per_word;
28421 enum machine_mode inner_mode;
28422 rtx words[4], shift;
28424 inner_mode = GET_MODE_INNER (mode);
28425 n_elts = GET_MODE_NUNITS (mode);
28426 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
28427 n_elt_per_word = n_elts / n_words;
28428 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
28430 for (i = 0; i < n_words; ++i)
28432 rtx word = NULL_RTX;
28434 for (j = 0; j < n_elt_per_word; ++j)
28436 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
28437 elt = convert_modes (word_mode, inner_mode, elt, true);
28443 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
28444 word, 1, OPTAB_LIB_WIDEN);
28445 word = expand_simple_binop (word_mode, IOR, word, elt,
28446 word, 1, OPTAB_LIB_WIDEN);
28454 emit_move_insn (target, gen_lowpart (mode, words[0]));
28455 else if (n_words == 2)
28457 rtx tmp = gen_reg_rtx (mode);
28458 emit_clobber (tmp);
28459 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
28460 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
28461 emit_move_insn (target, tmp);
28463 else if (n_words == 4)
28465 rtx tmp = gen_reg_rtx (V4SImode);
28466 gcc_assert (word_mode == SImode);
28467 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
28468 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
28469 emit_move_insn (target, gen_lowpart (mode, tmp));
28472 gcc_unreachable ();
28476 /* Initialize vector TARGET via VALS. Suppress the use of MMX
28477 instructions unless MMX_OK is true. */
28480 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
28482 enum machine_mode mode = GET_MODE (target);
28483 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28484 int n_elts = GET_MODE_NUNITS (mode);
28485 int n_var = 0, one_var = -1;
28486 bool all_same = true, all_const_zero = true;
28490 for (i = 0; i < n_elts; ++i)
28492 x = XVECEXP (vals, 0, i);
28493 if (!(CONST_INT_P (x)
28494 || GET_CODE (x) == CONST_DOUBLE
28495 || GET_CODE (x) == CONST_FIXED))
28496 n_var++, one_var = i;
28497 else if (x != CONST0_RTX (inner_mode))
28498 all_const_zero = false;
28499 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
28503 /* Constants are best loaded from the constant pool. */
28506 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
28510 /* If all values are identical, broadcast the value. */
28512 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
28513 XVECEXP (vals, 0, 0)))
28516 /* Values where only one field is non-constant are best loaded from
28517 the pool and overwritten via move later. */
28521 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
28522 XVECEXP (vals, 0, one_var),
28526 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
28530 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
28534 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
28536 enum machine_mode mode = GET_MODE (target);
28537 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28538 enum machine_mode half_mode;
28539 bool use_vec_merge = false;
28541 static rtx (*gen_extract[6][2]) (rtx, rtx)
28543 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
28544 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
28545 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
28546 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
28547 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
28548 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
28550 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
28552 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
28553 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
28554 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
28555 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
28556 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
28557 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
28567 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
28568 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
28570 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
28572 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
28573 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28579 use_vec_merge = TARGET_SSE4_1;
28587 /* For the two element vectors, we implement a VEC_CONCAT with
28588 the extraction of the other element. */
28590 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
28591 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
28594 op0 = val, op1 = tmp;
28596 op0 = tmp, op1 = val;
28598 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
28599 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28604 use_vec_merge = TARGET_SSE4_1;
28611 use_vec_merge = true;
28615 /* tmp = target = A B C D */
28616 tmp = copy_to_reg (target);
28617 /* target = A A B B */
28618 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
28619 /* target = X A B B */
28620 ix86_expand_vector_set (false, target, val, 0);
28621 /* target = A X C D */
28622 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28623 const1_rtx, const0_rtx,
28624 GEN_INT (2+4), GEN_INT (3+4)));
28628 /* tmp = target = A B C D */
28629 tmp = copy_to_reg (target);
28630 /* tmp = X B C D */
28631 ix86_expand_vector_set (false, tmp, val, 0);
28632 /* target = A B X D */
28633 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28634 const0_rtx, const1_rtx,
28635 GEN_INT (0+4), GEN_INT (3+4)));
28639 /* tmp = target = A B C D */
28640 tmp = copy_to_reg (target);
28641 /* tmp = X B C D */
28642 ix86_expand_vector_set (false, tmp, val, 0);
28643 /* target = A B X D */
28644 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28645 const0_rtx, const1_rtx,
28646 GEN_INT (2+4), GEN_INT (0+4)));
28650 gcc_unreachable ();
28655 use_vec_merge = TARGET_SSE4_1;
28659 /* Element 0 handled by vec_merge below. */
28662 use_vec_merge = true;
28668 /* With SSE2, use integer shuffles to swap element 0 and ELT,
28669 store into element 0, then shuffle them back. */
28673 order[0] = GEN_INT (elt);
28674 order[1] = const1_rtx;
28675 order[2] = const2_rtx;
28676 order[3] = GEN_INT (3);
28677 order[elt] = const0_rtx;
28679 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28680 order[1], order[2], order[3]));
28682 ix86_expand_vector_set (false, target, val, 0);
28684 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28685 order[1], order[2], order[3]));
28689 /* For SSE1, we have to reuse the V4SF code. */
28690 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
28691 gen_lowpart (SFmode, val), elt);
28696 use_vec_merge = TARGET_SSE2;
28699 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28703 use_vec_merge = TARGET_SSE4_1;
28710 half_mode = V16QImode;
28716 half_mode = V8HImode;
28722 half_mode = V4SImode;
28728 half_mode = V2DImode;
28734 half_mode = V4SFmode;
28740 half_mode = V2DFmode;
28746 /* Compute offset. */
28750 gcc_assert (i <= 1);
28752 /* Extract the half. */
28753 tmp = gen_reg_rtx (half_mode);
28754 emit_insn (gen_extract[j][i] (tmp, target));
28756 /* Put val in tmp at elt. */
28757 ix86_expand_vector_set (false, tmp, val, elt);
28760 emit_insn (gen_insert[j][i] (target, target, tmp));
28769 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
28770 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
28771 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28775 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28777 emit_move_insn (mem, target);
28779 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28780 emit_move_insn (tmp, val);
28782 emit_move_insn (target, mem);
28787 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
28789 enum machine_mode mode = GET_MODE (vec);
28790 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28791 bool use_vec_extr = false;
28804 use_vec_extr = true;
28808 use_vec_extr = TARGET_SSE4_1;
28820 tmp = gen_reg_rtx (mode);
28821 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
28822 GEN_INT (elt), GEN_INT (elt),
28823 GEN_INT (elt+4), GEN_INT (elt+4)));
28827 tmp = gen_reg_rtx (mode);
28828 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
28832 gcc_unreachable ();
28835 use_vec_extr = true;
28840 use_vec_extr = TARGET_SSE4_1;
28854 tmp = gen_reg_rtx (mode);
28855 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
28856 GEN_INT (elt), GEN_INT (elt),
28857 GEN_INT (elt), GEN_INT (elt)));
28861 tmp = gen_reg_rtx (mode);
28862 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
28866 gcc_unreachable ();
28869 use_vec_extr = true;
28874 /* For SSE1, we have to reuse the V4SF code. */
28875 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
28876 gen_lowpart (V4SFmode, vec), elt);
28882 use_vec_extr = TARGET_SSE2;
28885 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28889 use_vec_extr = TARGET_SSE4_1;
28893 /* ??? Could extract the appropriate HImode element and shift. */
28900 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
28901 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
28903 /* Let the rtl optimizers know about the zero extension performed. */
28904 if (inner_mode == QImode || inner_mode == HImode)
28906 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
28907 target = gen_lowpart (SImode, target);
28910 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28914 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28916 emit_move_insn (mem, vec);
28918 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28919 emit_move_insn (target, tmp);
28923 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
28924 pattern to reduce; DEST is the destination; IN is the input vector. */
28927 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
28929 rtx tmp1, tmp2, tmp3;
28931 tmp1 = gen_reg_rtx (V4SFmode);
28932 tmp2 = gen_reg_rtx (V4SFmode);
28933 tmp3 = gen_reg_rtx (V4SFmode);
28935 emit_insn (gen_sse_movhlps (tmp1, in, in));
28936 emit_insn (fn (tmp2, tmp1, in));
28938 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
28939 const1_rtx, const1_rtx,
28940 GEN_INT (1+4), GEN_INT (1+4)));
28941 emit_insn (fn (dest, tmp2, tmp3));
28944 /* Target hook for scalar_mode_supported_p. */
28946 ix86_scalar_mode_supported_p (enum machine_mode mode)
28948 if (DECIMAL_FLOAT_MODE_P (mode))
28949 return default_decimal_float_supported_p ();
28950 else if (mode == TFmode)
28953 return default_scalar_mode_supported_p (mode);
28956 /* Implements target hook vector_mode_supported_p. */
28958 ix86_vector_mode_supported_p (enum machine_mode mode)
28960 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
28962 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
28964 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
28966 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
28968 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
28973 /* Target hook for c_mode_for_suffix. */
28974 static enum machine_mode
28975 ix86_c_mode_for_suffix (char suffix)
28985 /* Worker function for TARGET_MD_ASM_CLOBBERS.
28987 We do this in the new i386 backend to maintain source compatibility
28988 with the old cc0-based compiler. */
28991 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
28992 tree inputs ATTRIBUTE_UNUSED,
28995 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
28997 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
29002 /* Implements target vector targetm.asm.encode_section_info. This
29003 is not used by netware. */
29005 static void ATTRIBUTE_UNUSED
29006 ix86_encode_section_info (tree decl, rtx rtl, int first)
29008 default_encode_section_info (decl, rtl, first);
29010 if (TREE_CODE (decl) == VAR_DECL
29011 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
29012 && ix86_in_large_data_p (decl))
29013 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
29016 /* Worker function for REVERSE_CONDITION. */
29019 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
29021 return (mode != CCFPmode && mode != CCFPUmode
29022 ? reverse_condition (code)
29023 : reverse_condition_maybe_unordered (code));
29026 /* Output code to perform an x87 FP register move, from OPERANDS[1]
29030 output_387_reg_move (rtx insn, rtx *operands)
29032 if (REG_P (operands[0]))
29034 if (REG_P (operands[1])
29035 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
29037 if (REGNO (operands[0]) == FIRST_STACK_REG)
29038 return output_387_ffreep (operands, 0);
29039 return "fstp\t%y0";
29041 if (STACK_TOP_P (operands[0]))
29042 return "fld%Z1\t%y1";
29045 else if (MEM_P (operands[0]))
29047 gcc_assert (REG_P (operands[1]));
29048 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
29049 return "fstp%Z0\t%y0";
29052 /* There is no non-popping store to memory for XFmode.
29053 So if we need one, follow the store with a load. */
29054 if (GET_MODE (operands[0]) == XFmode)
29055 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
29057 return "fst%Z0\t%y0";
29064 /* Output code to perform a conditional jump to LABEL, if C2 flag in
29065 FP status register is set. */
29068 ix86_emit_fp_unordered_jump (rtx label)
29070 rtx reg = gen_reg_rtx (HImode);
29073 emit_insn (gen_x86_fnstsw_1 (reg));
29075 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
29077 emit_insn (gen_x86_sahf_1 (reg));
29079 temp = gen_rtx_REG (CCmode, FLAGS_REG);
29080 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
29084 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
29086 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
29087 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
29090 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
29091 gen_rtx_LABEL_REF (VOIDmode, label),
29093 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
29095 emit_jump_insn (temp);
29096 predict_jump (REG_BR_PROB_BASE * 10 / 100);
29099 /* Output code to perform a log1p XFmode calculation. */
29101 void ix86_emit_i387_log1p (rtx op0, rtx op1)
29103 rtx label1 = gen_label_rtx ();
29104 rtx label2 = gen_label_rtx ();
29106 rtx tmp = gen_reg_rtx (XFmode);
29107 rtx tmp2 = gen_reg_rtx (XFmode);
29110 emit_insn (gen_absxf2 (tmp, op1));
29111 test = gen_rtx_GE (VOIDmode, tmp,
29112 CONST_DOUBLE_FROM_REAL_VALUE (
29113 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
29115 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
29117 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
29118 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
29119 emit_jump (label2);
29121 emit_label (label1);
29122 emit_move_insn (tmp, CONST1_RTX (XFmode));
29123 emit_insn (gen_addxf3 (tmp, op1, tmp));
29124 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
29125 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
29127 emit_label (label2);
29130 /* Output code to perform a Newton-Rhapson approximation of a single precision
29131 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
29133 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
29135 rtx x0, x1, e0, e1, two;
29137 x0 = gen_reg_rtx (mode);
29138 e0 = gen_reg_rtx (mode);
29139 e1 = gen_reg_rtx (mode);
29140 x1 = gen_reg_rtx (mode);
29142 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
29144 if (VECTOR_MODE_P (mode))
29145 two = ix86_build_const_vector (SFmode, true, two);
29147 two = force_reg (mode, two);
29149 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
29151 /* x0 = rcp(b) estimate */
29152 emit_insn (gen_rtx_SET (VOIDmode, x0,
29153 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
29156 emit_insn (gen_rtx_SET (VOIDmode, e0,
29157 gen_rtx_MULT (mode, x0, a)));
29159 emit_insn (gen_rtx_SET (VOIDmode, e1,
29160 gen_rtx_MULT (mode, x0, b)));
29162 emit_insn (gen_rtx_SET (VOIDmode, x1,
29163 gen_rtx_MINUS (mode, two, e1)));
29164 /* res = e0 * x1 */
29165 emit_insn (gen_rtx_SET (VOIDmode, res,
29166 gen_rtx_MULT (mode, e0, x1)));
29169 /* Output code to perform a Newton-Rhapson approximation of a
29170 single precision floating point [reciprocal] square root. */
29172 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
29175 rtx x0, e0, e1, e2, e3, mthree, mhalf;
29178 x0 = gen_reg_rtx (mode);
29179 e0 = gen_reg_rtx (mode);
29180 e1 = gen_reg_rtx (mode);
29181 e2 = gen_reg_rtx (mode);
29182 e3 = gen_reg_rtx (mode);
29184 real_from_integer (&r, VOIDmode, -3, -1, 0);
29185 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
29187 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
29188 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
29190 if (VECTOR_MODE_P (mode))
29192 mthree = ix86_build_const_vector (SFmode, true, mthree);
29193 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
29196 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
29197 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
29199 /* x0 = rsqrt(a) estimate */
29200 emit_insn (gen_rtx_SET (VOIDmode, x0,
29201 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
29204 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
29209 zero = gen_reg_rtx (mode);
29210 mask = gen_reg_rtx (mode);
29212 zero = force_reg (mode, CONST0_RTX(mode));
29213 emit_insn (gen_rtx_SET (VOIDmode, mask,
29214 gen_rtx_NE (mode, zero, a)));
29216 emit_insn (gen_rtx_SET (VOIDmode, x0,
29217 gen_rtx_AND (mode, x0, mask)));
29221 emit_insn (gen_rtx_SET (VOIDmode, e0,
29222 gen_rtx_MULT (mode, x0, a)));
29224 emit_insn (gen_rtx_SET (VOIDmode, e1,
29225 gen_rtx_MULT (mode, e0, x0)));
29228 mthree = force_reg (mode, mthree);
29229 emit_insn (gen_rtx_SET (VOIDmode, e2,
29230 gen_rtx_PLUS (mode, e1, mthree)));
29232 mhalf = force_reg (mode, mhalf);
29234 /* e3 = -.5 * x0 */
29235 emit_insn (gen_rtx_SET (VOIDmode, e3,
29236 gen_rtx_MULT (mode, x0, mhalf)));
29238 /* e3 = -.5 * e0 */
29239 emit_insn (gen_rtx_SET (VOIDmode, e3,
29240 gen_rtx_MULT (mode, e0, mhalf)));
29241 /* ret = e2 * e3 */
29242 emit_insn (gen_rtx_SET (VOIDmode, res,
29243 gen_rtx_MULT (mode, e2, e3)));
29246 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
29248 static void ATTRIBUTE_UNUSED
29249 i386_solaris_elf_named_section (const char *name, unsigned int flags,
29252 /* With Binutils 2.15, the "@unwind" marker must be specified on
29253 every occurrence of the ".eh_frame" section, not just the first
29256 && strcmp (name, ".eh_frame") == 0)
29258 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
29259 flags & SECTION_WRITE ? "aw" : "a");
29262 default_elf_asm_named_section (name, flags, decl);
29265 /* Return the mangling of TYPE if it is an extended fundamental type. */
29267 static const char *
29268 ix86_mangle_type (const_tree type)
29270 type = TYPE_MAIN_VARIANT (type);
29272 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
29273 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
29276 switch (TYPE_MODE (type))
29279 /* __float128 is "g". */
29282 /* "long double" or __float80 is "e". */
29289 /* For 32-bit code we can save PIC register setup by using
29290 __stack_chk_fail_local hidden function instead of calling
29291 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
29292 register, so it is better to call __stack_chk_fail directly. */
29295 ix86_stack_protect_fail (void)
29297 return TARGET_64BIT
29298 ? default_external_stack_protect_fail ()
29299 : default_hidden_stack_protect_fail ();
29302 /* Select a format to encode pointers in exception handling data. CODE
29303 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
29304 true if the symbol may be affected by dynamic relocations.
29306 ??? All x86 object file formats are capable of representing this.
29307 After all, the relocation needed is the same as for the call insn.
29308 Whether or not a particular assembler allows us to enter such, I
29309 guess we'll have to see. */
29311 asm_preferred_eh_data_format (int code, int global)
29315 int type = DW_EH_PE_sdata8;
29317 || ix86_cmodel == CM_SMALL_PIC
29318 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
29319 type = DW_EH_PE_sdata4;
29320 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
29322 if (ix86_cmodel == CM_SMALL
29323 || (ix86_cmodel == CM_MEDIUM && code))
29324 return DW_EH_PE_udata4;
29325 return DW_EH_PE_absptr;
29328 /* Expand copysign from SIGN to the positive value ABS_VALUE
29329 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
29332 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
29334 enum machine_mode mode = GET_MODE (sign);
29335 rtx sgn = gen_reg_rtx (mode);
29336 if (mask == NULL_RTX)
29338 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
29339 if (!VECTOR_MODE_P (mode))
29341 /* We need to generate a scalar mode mask in this case. */
29342 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29343 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29344 mask = gen_reg_rtx (mode);
29345 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29349 mask = gen_rtx_NOT (mode, mask);
29350 emit_insn (gen_rtx_SET (VOIDmode, sgn,
29351 gen_rtx_AND (mode, mask, sign)));
29352 emit_insn (gen_rtx_SET (VOIDmode, result,
29353 gen_rtx_IOR (mode, abs_value, sgn)));
29356 /* Expand fabs (OP0) and return a new rtx that holds the result. The
29357 mask for masking out the sign-bit is stored in *SMASK, if that is
29360 ix86_expand_sse_fabs (rtx op0, rtx *smask)
29362 enum machine_mode mode = GET_MODE (op0);
29365 xa = gen_reg_rtx (mode);
29366 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
29367 if (!VECTOR_MODE_P (mode))
29369 /* We need to generate a scalar mode mask in this case. */
29370 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29371 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29372 mask = gen_reg_rtx (mode);
29373 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29375 emit_insn (gen_rtx_SET (VOIDmode, xa,
29376 gen_rtx_AND (mode, op0, mask)));
29384 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
29385 swapping the operands if SWAP_OPERANDS is true. The expanded
29386 code is a forward jump to a newly created label in case the
29387 comparison is true. The generated label rtx is returned. */
29389 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
29390 bool swap_operands)
29401 label = gen_label_rtx ();
29402 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
29403 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29404 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
29405 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
29406 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
29407 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
29408 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
29409 JUMP_LABEL (tmp) = label;
29414 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
29415 using comparison code CODE. Operands are swapped for the comparison if
29416 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
29418 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
29419 bool swap_operands)
29421 enum machine_mode mode = GET_MODE (op0);
29422 rtx mask = gen_reg_rtx (mode);
29431 if (mode == DFmode)
29432 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
29433 gen_rtx_fmt_ee (code, mode, op0, op1)));
29435 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
29436 gen_rtx_fmt_ee (code, mode, op0, op1)));
29441 /* Generate and return a rtx of mode MODE for 2**n where n is the number
29442 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
29444 ix86_gen_TWO52 (enum machine_mode mode)
29446 REAL_VALUE_TYPE TWO52r;
29449 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
29450 TWO52 = const_double_from_real_value (TWO52r, mode);
29451 TWO52 = force_reg (mode, TWO52);
29456 /* Expand SSE sequence for computing lround from OP1 storing
29459 ix86_expand_lround (rtx op0, rtx op1)
29461 /* C code for the stuff we're doing below:
29462 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
29465 enum machine_mode mode = GET_MODE (op1);
29466 const struct real_format *fmt;
29467 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29470 /* load nextafter (0.5, 0.0) */
29471 fmt = REAL_MODE_FORMAT (mode);
29472 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29473 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29475 /* adj = copysign (0.5, op1) */
29476 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
29477 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
29479 /* adj = op1 + adj */
29480 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
29482 /* op0 = (imode)adj */
29483 expand_fix (op0, adj, 0);
29486 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
29489 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
29491 /* C code for the stuff we're doing below (for do_floor):
29493 xi -= (double)xi > op1 ? 1 : 0;
29496 enum machine_mode fmode = GET_MODE (op1);
29497 enum machine_mode imode = GET_MODE (op0);
29498 rtx ireg, freg, label, tmp;
29500 /* reg = (long)op1 */
29501 ireg = gen_reg_rtx (imode);
29502 expand_fix (ireg, op1, 0);
29504 /* freg = (double)reg */
29505 freg = gen_reg_rtx (fmode);
29506 expand_float (freg, ireg, 0);
29508 /* ireg = (freg > op1) ? ireg - 1 : ireg */
29509 label = ix86_expand_sse_compare_and_jump (UNLE,
29510 freg, op1, !do_floor);
29511 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
29512 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
29513 emit_move_insn (ireg, tmp);
29515 emit_label (label);
29516 LABEL_NUSES (label) = 1;
29518 emit_move_insn (op0, ireg);
29521 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
29522 result in OPERAND0. */
29524 ix86_expand_rint (rtx operand0, rtx operand1)
29526 /* C code for the stuff we're doing below:
29527 xa = fabs (operand1);
29528 if (!isless (xa, 2**52))
29530 xa = xa + 2**52 - 2**52;
29531 return copysign (xa, operand1);
29533 enum machine_mode mode = GET_MODE (operand0);
29534 rtx res, xa, label, TWO52, mask;
29536 res = gen_reg_rtx (mode);
29537 emit_move_insn (res, operand1);
29539 /* xa = abs (operand1) */
29540 xa = ix86_expand_sse_fabs (res, &mask);
29542 /* if (!isless (xa, TWO52)) goto label; */
29543 TWO52 = ix86_gen_TWO52 (mode);
29544 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29546 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29547 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29549 ix86_sse_copysign_to_positive (res, xa, res, mask);
29551 emit_label (label);
29552 LABEL_NUSES (label) = 1;
29554 emit_move_insn (operand0, res);
29557 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29560 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
29562 /* C code for the stuff we expand below.
29563 double xa = fabs (x), x2;
29564 if (!isless (xa, TWO52))
29566 xa = xa + TWO52 - TWO52;
29567 x2 = copysign (xa, x);
29576 enum machine_mode mode = GET_MODE (operand0);
29577 rtx xa, TWO52, tmp, label, one, res, mask;
29579 TWO52 = ix86_gen_TWO52 (mode);
29581 /* Temporary for holding the result, initialized to the input
29582 operand to ease control flow. */
29583 res = gen_reg_rtx (mode);
29584 emit_move_insn (res, operand1);
29586 /* xa = abs (operand1) */
29587 xa = ix86_expand_sse_fabs (res, &mask);
29589 /* if (!isless (xa, TWO52)) goto label; */
29590 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29592 /* xa = xa + TWO52 - TWO52; */
29593 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29594 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29596 /* xa = copysign (xa, operand1) */
29597 ix86_sse_copysign_to_positive (xa, xa, res, mask);
29599 /* generate 1.0 or -1.0 */
29600 one = force_reg (mode,
29601 const_double_from_real_value (do_floor
29602 ? dconst1 : dconstm1, mode));
29604 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29605 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29606 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29607 gen_rtx_AND (mode, one, tmp)));
29608 /* We always need to subtract here to preserve signed zero. */
29609 tmp = expand_simple_binop (mode, MINUS,
29610 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29611 emit_move_insn (res, tmp);
29613 emit_label (label);
29614 LABEL_NUSES (label) = 1;
29616 emit_move_insn (operand0, res);
29619 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29622 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
29624 /* C code for the stuff we expand below.
29625 double xa = fabs (x), x2;
29626 if (!isless (xa, TWO52))
29628 x2 = (double)(long)x;
29635 if (HONOR_SIGNED_ZEROS (mode))
29636 return copysign (x2, x);
29639 enum machine_mode mode = GET_MODE (operand0);
29640 rtx xa, xi, TWO52, tmp, label, one, res, mask;
29642 TWO52 = ix86_gen_TWO52 (mode);
29644 /* Temporary for holding the result, initialized to the input
29645 operand to ease control flow. */
29646 res = gen_reg_rtx (mode);
29647 emit_move_insn (res, operand1);
29649 /* xa = abs (operand1) */
29650 xa = ix86_expand_sse_fabs (res, &mask);
29652 /* if (!isless (xa, TWO52)) goto label; */
29653 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29655 /* xa = (double)(long)x */
29656 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29657 expand_fix (xi, res, 0);
29658 expand_float (xa, xi, 0);
29661 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29663 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29664 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29665 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29666 gen_rtx_AND (mode, one, tmp)));
29667 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
29668 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29669 emit_move_insn (res, tmp);
29671 if (HONOR_SIGNED_ZEROS (mode))
29672 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29674 emit_label (label);
29675 LABEL_NUSES (label) = 1;
29677 emit_move_insn (operand0, res);
29680 /* Expand SSE sequence for computing round from OPERAND1 storing
29681 into OPERAND0. Sequence that works without relying on DImode truncation
29682 via cvttsd2siq that is only available on 64bit targets. */
29684 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
29686 /* C code for the stuff we expand below.
29687 double xa = fabs (x), xa2, x2;
29688 if (!isless (xa, TWO52))
29690 Using the absolute value and copying back sign makes
29691 -0.0 -> -0.0 correct.
29692 xa2 = xa + TWO52 - TWO52;
29697 else if (dxa > 0.5)
29699 x2 = copysign (xa2, x);
29702 enum machine_mode mode = GET_MODE (operand0);
29703 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
29705 TWO52 = ix86_gen_TWO52 (mode);
29707 /* Temporary for holding the result, initialized to the input
29708 operand to ease control flow. */
29709 res = gen_reg_rtx (mode);
29710 emit_move_insn (res, operand1);
29712 /* xa = abs (operand1) */
29713 xa = ix86_expand_sse_fabs (res, &mask);
29715 /* if (!isless (xa, TWO52)) goto label; */
29716 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29718 /* xa2 = xa + TWO52 - TWO52; */
29719 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29720 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
29722 /* dxa = xa2 - xa; */
29723 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
29725 /* generate 0.5, 1.0 and -0.5 */
29726 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
29727 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
29728 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
29732 tmp = gen_reg_rtx (mode);
29733 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
29734 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
29735 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29736 gen_rtx_AND (mode, one, tmp)));
29737 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29738 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
29739 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
29740 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29741 gen_rtx_AND (mode, one, tmp)));
29742 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29744 /* res = copysign (xa2, operand1) */
29745 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
29747 emit_label (label);
29748 LABEL_NUSES (label) = 1;
29750 emit_move_insn (operand0, res);
29753 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29756 ix86_expand_trunc (rtx operand0, rtx operand1)
29758 /* C code for SSE variant we expand below.
29759 double xa = fabs (x), x2;
29760 if (!isless (xa, TWO52))
29762 x2 = (double)(long)x;
29763 if (HONOR_SIGNED_ZEROS (mode))
29764 return copysign (x2, x);
29767 enum machine_mode mode = GET_MODE (operand0);
29768 rtx xa, xi, TWO52, label, res, mask;
29770 TWO52 = ix86_gen_TWO52 (mode);
29772 /* Temporary for holding the result, initialized to the input
29773 operand to ease control flow. */
29774 res = gen_reg_rtx (mode);
29775 emit_move_insn (res, operand1);
29777 /* xa = abs (operand1) */
29778 xa = ix86_expand_sse_fabs (res, &mask);
29780 /* if (!isless (xa, TWO52)) goto label; */
29781 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29783 /* x = (double)(long)x */
29784 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29785 expand_fix (xi, res, 0);
29786 expand_float (res, xi, 0);
29788 if (HONOR_SIGNED_ZEROS (mode))
29789 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29791 emit_label (label);
29792 LABEL_NUSES (label) = 1;
29794 emit_move_insn (operand0, res);
29797 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29800 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
29802 enum machine_mode mode = GET_MODE (operand0);
29803 rtx xa, mask, TWO52, label, one, res, smask, tmp;
29805 /* C code for SSE variant we expand below.
29806 double xa = fabs (x), x2;
29807 if (!isless (xa, TWO52))
29809 xa2 = xa + TWO52 - TWO52;
29813 x2 = copysign (xa2, x);
29817 TWO52 = ix86_gen_TWO52 (mode);
29819 /* Temporary for holding the result, initialized to the input
29820 operand to ease control flow. */
29821 res = gen_reg_rtx (mode);
29822 emit_move_insn (res, operand1);
29824 /* xa = abs (operand1) */
29825 xa = ix86_expand_sse_fabs (res, &smask);
29827 /* if (!isless (xa, TWO52)) goto label; */
29828 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29830 /* res = xa + TWO52 - TWO52; */
29831 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29832 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
29833 emit_move_insn (res, tmp);
29836 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29838 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
29839 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
29840 emit_insn (gen_rtx_SET (VOIDmode, mask,
29841 gen_rtx_AND (mode, mask, one)));
29842 tmp = expand_simple_binop (mode, MINUS,
29843 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
29844 emit_move_insn (res, tmp);
29846 /* res = copysign (res, operand1) */
29847 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
29849 emit_label (label);
29850 LABEL_NUSES (label) = 1;
29852 emit_move_insn (operand0, res);
29855 /* Expand SSE sequence for computing round from OPERAND1 storing
29858 ix86_expand_round (rtx operand0, rtx operand1)
29860 /* C code for the stuff we're doing below:
29861 double xa = fabs (x);
29862 if (!isless (xa, TWO52))
29864 xa = (double)(long)(xa + nextafter (0.5, 0.0));
29865 return copysign (xa, x);
29867 enum machine_mode mode = GET_MODE (operand0);
29868 rtx res, TWO52, xa, label, xi, half, mask;
29869 const struct real_format *fmt;
29870 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29872 /* Temporary for holding the result, initialized to the input
29873 operand to ease control flow. */
29874 res = gen_reg_rtx (mode);
29875 emit_move_insn (res, operand1);
29877 TWO52 = ix86_gen_TWO52 (mode);
29878 xa = ix86_expand_sse_fabs (res, &mask);
29879 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29881 /* load nextafter (0.5, 0.0) */
29882 fmt = REAL_MODE_FORMAT (mode);
29883 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29884 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29886 /* xa = xa + 0.5 */
29887 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
29888 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
29890 /* xa = (double)(int64_t)xa */
29891 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29892 expand_fix (xi, xa, 0);
29893 expand_float (xa, xi, 0);
29895 /* res = copysign (xa, operand1) */
29896 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
29898 emit_label (label);
29899 LABEL_NUSES (label) = 1;
29901 emit_move_insn (operand0, res);
29905 /* Table of valid machine attributes. */
29906 static const struct attribute_spec ix86_attribute_table[] =
29908 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
29909 /* Stdcall attribute says callee is responsible for popping arguments
29910 if they are not variable. */
29911 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29912 /* Fastcall attribute says callee is responsible for popping arguments
29913 if they are not variable. */
29914 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29915 /* Thiscall attribute says callee is responsible for popping arguments
29916 if they are not variable. */
29917 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29918 /* Cdecl attribute says the callee is a normal C declaration */
29919 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29920 /* Regparm attribute specifies how many integer arguments are to be
29921 passed in registers. */
29922 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
29923 /* Sseregparm attribute says we are using x86_64 calling conventions
29924 for FP arguments. */
29925 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29926 /* force_align_arg_pointer says this function realigns the stack at entry. */
29927 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
29928 false, true, true, ix86_handle_cconv_attribute },
29929 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
29930 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
29931 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
29932 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
29934 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
29935 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
29936 #ifdef SUBTARGET_ATTRIBUTE_TABLE
29937 SUBTARGET_ATTRIBUTE_TABLE,
29939 /* ms_abi and sysv_abi calling convention function attributes. */
29940 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
29941 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
29942 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute },
29944 { NULL, 0, 0, false, false, false, NULL }
29947 /* Implement targetm.vectorize.builtin_vectorization_cost. */
29949 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
29950 tree vectype ATTRIBUTE_UNUSED,
29951 int misalign ATTRIBUTE_UNUSED)
29953 switch (type_of_cost)
29956 return ix86_cost->scalar_stmt_cost;
29959 return ix86_cost->scalar_load_cost;
29962 return ix86_cost->scalar_store_cost;
29965 return ix86_cost->vec_stmt_cost;
29968 return ix86_cost->vec_align_load_cost;
29971 return ix86_cost->vec_store_cost;
29973 case vec_to_scalar:
29974 return ix86_cost->vec_to_scalar_cost;
29976 case scalar_to_vec:
29977 return ix86_cost->scalar_to_vec_cost;
29979 case unaligned_load:
29980 case unaligned_store:
29981 return ix86_cost->vec_unalign_load_cost;
29983 case cond_branch_taken:
29984 return ix86_cost->cond_taken_branch_cost;
29986 case cond_branch_not_taken:
29987 return ix86_cost->cond_not_taken_branch_cost;
29993 gcc_unreachable ();
29998 /* Implement targetm.vectorize.builtin_vec_perm. */
30001 ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
30003 tree itype = TREE_TYPE (vec_type);
30004 bool u = TYPE_UNSIGNED (itype);
30005 enum machine_mode vmode = TYPE_MODE (vec_type);
30006 enum ix86_builtins fcode;
30007 bool ok = TARGET_SSE2;
30013 fcode = IX86_BUILTIN_VEC_PERM_V4DF;
30016 fcode = IX86_BUILTIN_VEC_PERM_V2DF;
30018 itype = ix86_get_builtin_type (IX86_BT_DI);
30023 fcode = IX86_BUILTIN_VEC_PERM_V8SF;
30027 fcode = IX86_BUILTIN_VEC_PERM_V4SF;
30029 itype = ix86_get_builtin_type (IX86_BT_SI);
30033 fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
30036 fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI;
30039 fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI;
30042 fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
30052 *mask_type = itype;
30053 return ix86_builtins[(int) fcode];
30056 /* Return a vector mode with twice as many elements as VMODE. */
30057 /* ??? Consider moving this to a table generated by genmodes.c. */
30059 static enum machine_mode
30060 doublesize_vector_mode (enum machine_mode vmode)
30064 case V2SFmode: return V4SFmode;
30065 case V1DImode: return V2DImode;
30066 case V2SImode: return V4SImode;
30067 case V4HImode: return V8HImode;
30068 case V8QImode: return V16QImode;
30070 case V2DFmode: return V4DFmode;
30071 case V4SFmode: return V8SFmode;
30072 case V2DImode: return V4DImode;
30073 case V4SImode: return V8SImode;
30074 case V8HImode: return V16HImode;
30075 case V16QImode: return V32QImode;
30077 case V4DFmode: return V8DFmode;
30078 case V8SFmode: return V16SFmode;
30079 case V4DImode: return V8DImode;
30080 case V8SImode: return V16SImode;
30081 case V16HImode: return V32HImode;
30082 case V32QImode: return V64QImode;
30085 gcc_unreachable ();
30089 /* Construct (set target (vec_select op0 (parallel perm))) and
30090 return true if that's a valid instruction in the active ISA. */
30093 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
30095 rtx rperm[MAX_VECT_LEN], x;
30098 for (i = 0; i < nelt; ++i)
30099 rperm[i] = GEN_INT (perm[i]);
30101 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
30102 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
30103 x = gen_rtx_SET (VOIDmode, target, x);
30106 if (recog_memoized (x) < 0)
30114 /* Similar, but generate a vec_concat from op0 and op1 as well. */
30117 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
30118 const unsigned char *perm, unsigned nelt)
30120 enum machine_mode v2mode;
30123 v2mode = doublesize_vector_mode (GET_MODE (op0));
30124 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
30125 return expand_vselect (target, x, perm, nelt);
30128 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
30129 in terms of blendp[sd] / pblendw / pblendvb. */
30132 expand_vec_perm_blend (struct expand_vec_perm_d *d)
30134 enum machine_mode vmode = d->vmode;
30135 unsigned i, mask, nelt = d->nelt;
30136 rtx target, op0, op1, x;
30138 if (!TARGET_SSE4_1 || d->op0 == d->op1)
30140 if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode))
30143 /* This is a blend, not a permute. Elements must stay in their
30144 respective lanes. */
30145 for (i = 0; i < nelt; ++i)
30147 unsigned e = d->perm[i];
30148 if (!(e == i || e == i + nelt))
30155 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
30156 decision should be extracted elsewhere, so that we only try that
30157 sequence once all budget==3 options have been tried. */
30159 /* For bytes, see if bytes move in pairs so we can use pblendw with
30160 an immediate argument, rather than pblendvb with a vector argument. */
30161 if (vmode == V16QImode)
30163 bool pblendw_ok = true;
30164 for (i = 0; i < 16 && pblendw_ok; i += 2)
30165 pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]);
30169 rtx rperm[16], vperm;
30171 for (i = 0; i < nelt; ++i)
30172 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
30174 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
30175 vperm = force_reg (V16QImode, vperm);
30177 emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm));
30182 target = d->target;
30194 for (i = 0; i < nelt; ++i)
30195 mask |= (d->perm[i] >= nelt) << i;
30199 for (i = 0; i < 2; ++i)
30200 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
30204 for (i = 0; i < 4; ++i)
30205 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
30209 for (i = 0; i < 8; ++i)
30210 mask |= (d->perm[i * 2] >= 16) << i;
30214 target = gen_lowpart (vmode, target);
30215 op0 = gen_lowpart (vmode, op0);
30216 op1 = gen_lowpart (vmode, op1);
30220 gcc_unreachable ();
30223 /* This matches five different patterns with the different modes. */
30224 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
30225 x = gen_rtx_SET (VOIDmode, target, x);
30231 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
30232 in terms of the variable form of vpermilps.
30234 Note that we will have already failed the immediate input vpermilps,
30235 which requires that the high and low part shuffle be identical; the
30236 variable form doesn't require that. */
30239 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
30241 rtx rperm[8], vperm;
30244 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
30247 /* We can only permute within the 128-bit lane. */
30248 for (i = 0; i < 8; ++i)
30250 unsigned e = d->perm[i];
30251 if (i < 4 ? e >= 4 : e < 4)
30258 for (i = 0; i < 8; ++i)
30260 unsigned e = d->perm[i];
30262 /* Within each 128-bit lane, the elements of op0 are numbered
30263 from 0 and the elements of op1 are numbered from 4. */
30269 rperm[i] = GEN_INT (e);
30272 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
30273 vperm = force_reg (V8SImode, vperm);
30274 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
30279 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
30280 in terms of pshufb or vpperm. */
30283 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
30285 unsigned i, nelt, eltsz;
30286 rtx rperm[16], vperm, target, op0, op1;
30288 if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP))
30290 if (GET_MODE_SIZE (d->vmode) != 16)
30297 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
30299 for (i = 0; i < nelt; ++i)
30301 unsigned j, e = d->perm[i];
30302 for (j = 0; j < eltsz; ++j)
30303 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
30306 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
30307 vperm = force_reg (V16QImode, vperm);
30309 target = gen_lowpart (V16QImode, d->target);
30310 op0 = gen_lowpart (V16QImode, d->op0);
30311 if (d->op0 == d->op1)
30312 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
30315 op1 = gen_lowpart (V16QImode, d->op1);
30316 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
30322 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
30323 in a single instruction. */
30326 expand_vec_perm_1 (struct expand_vec_perm_d *d)
30328 unsigned i, nelt = d->nelt;
30329 unsigned char perm2[MAX_VECT_LEN];
30331 /* Check plain VEC_SELECT first, because AVX has instructions that could
30332 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
30333 input where SEL+CONCAT may not. */
30334 if (d->op0 == d->op1)
30336 int mask = nelt - 1;
30338 for (i = 0; i < nelt; i++)
30339 perm2[i] = d->perm[i] & mask;
30341 if (expand_vselect (d->target, d->op0, perm2, nelt))
30344 /* There are plenty of patterns in sse.md that are written for
30345 SEL+CONCAT and are not replicated for a single op. Perhaps
30346 that should be changed, to avoid the nastiness here. */
30348 /* Recognize interleave style patterns, which means incrementing
30349 every other permutation operand. */
30350 for (i = 0; i < nelt; i += 2)
30352 perm2[i] = d->perm[i] & mask;
30353 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
30355 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
30358 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
30361 for (i = 0; i < nelt; i += 4)
30363 perm2[i + 0] = d->perm[i + 0] & mask;
30364 perm2[i + 1] = d->perm[i + 1] & mask;
30365 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
30366 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
30369 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
30374 /* Finally, try the fully general two operand permute. */
30375 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
30378 /* Recognize interleave style patterns with reversed operands. */
30379 if (d->op0 != d->op1)
30381 for (i = 0; i < nelt; ++i)
30383 unsigned e = d->perm[i];
30391 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
30395 /* Try the SSE4.1 blend variable merge instructions. */
30396 if (expand_vec_perm_blend (d))
30399 /* Try one of the AVX vpermil variable permutations. */
30400 if (expand_vec_perm_vpermil (d))
30403 /* Try the SSSE3 pshufb or XOP vpperm variable permutation. */
30404 if (expand_vec_perm_pshufb (d))
30410 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
30411 in terms of a pair of pshuflw + pshufhw instructions. */
30414 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
30416 unsigned char perm2[MAX_VECT_LEN];
30420 if (d->vmode != V8HImode || d->op0 != d->op1)
30423 /* The two permutations only operate in 64-bit lanes. */
30424 for (i = 0; i < 4; ++i)
30425 if (d->perm[i] >= 4)
30427 for (i = 4; i < 8; ++i)
30428 if (d->perm[i] < 4)
30434 /* Emit the pshuflw. */
30435 memcpy (perm2, d->perm, 4);
30436 for (i = 4; i < 8; ++i)
30438 ok = expand_vselect (d->target, d->op0, perm2, 8);
30441 /* Emit the pshufhw. */
30442 memcpy (perm2 + 4, d->perm + 4, 4);
30443 for (i = 0; i < 4; ++i)
30445 ok = expand_vselect (d->target, d->target, perm2, 8);
30451 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
30452 the permutation using the SSSE3 palignr instruction. This succeeds
30453 when all of the elements in PERM fit within one vector and we merely
30454 need to shift them down so that a single vector permutation has a
30455 chance to succeed. */
30458 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
30460 unsigned i, nelt = d->nelt;
30465 /* Even with AVX, palignr only operates on 128-bit vectors. */
30466 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
30469 min = nelt, max = 0;
30470 for (i = 0; i < nelt; ++i)
30472 unsigned e = d->perm[i];
30478 if (min == 0 || max - min >= nelt)
30481 /* Given that we have SSSE3, we know we'll be able to implement the
30482 single operand permutation after the palignr with pshufb. */
30486 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
30487 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
30488 gen_lowpart (TImode, d->op1),
30489 gen_lowpart (TImode, d->op0), shift));
30491 d->op0 = d->op1 = d->target;
30494 for (i = 0; i < nelt; ++i)
30496 unsigned e = d->perm[i] - min;
30502 /* Test for the degenerate case where the alignment by itself
30503 produces the desired permutation. */
30507 ok = expand_vec_perm_1 (d);
30513 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
30514 a two vector permutation into a single vector permutation by using
30515 an interleave operation to merge the vectors. */
30518 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
30520 struct expand_vec_perm_d dremap, dfinal;
30521 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
30522 unsigned contents, h1, h2, h3, h4;
30523 unsigned char remap[2 * MAX_VECT_LEN];
30527 if (d->op0 == d->op1)
30530 /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
30531 lanes. We can use similar techniques with the vperm2f128 instruction,
30532 but it requires slightly different logic. */
30533 if (GET_MODE_SIZE (d->vmode) != 16)
30536 /* Examine from whence the elements come. */
30538 for (i = 0; i < nelt; ++i)
30539 contents |= 1u << d->perm[i];
30541 /* Split the two input vectors into 4 halves. */
30542 h1 = (1u << nelt2) - 1;
30547 memset (remap, 0xff, sizeof (remap));
30550 /* If the elements from the low halves use interleave low, and similarly
30551 for interleave high. If the elements are from mis-matched halves, we
30552 can use shufps for V4SF/V4SI or do a DImode shuffle. */
30553 if ((contents & (h1 | h3)) == contents)
30555 for (i = 0; i < nelt2; ++i)
30558 remap[i + nelt] = i * 2 + 1;
30559 dremap.perm[i * 2] = i;
30560 dremap.perm[i * 2 + 1] = i + nelt;
30563 else if ((contents & (h2 | h4)) == contents)
30565 for (i = 0; i < nelt2; ++i)
30567 remap[i + nelt2] = i * 2;
30568 remap[i + nelt + nelt2] = i * 2 + 1;
30569 dremap.perm[i * 2] = i + nelt2;
30570 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
30573 else if ((contents & (h1 | h4)) == contents)
30575 for (i = 0; i < nelt2; ++i)
30578 remap[i + nelt + nelt2] = i + nelt2;
30579 dremap.perm[i] = i;
30580 dremap.perm[i + nelt2] = i + nelt + nelt2;
30584 dremap.vmode = V2DImode;
30586 dremap.perm[0] = 0;
30587 dremap.perm[1] = 3;
30590 else if ((contents & (h2 | h3)) == contents)
30592 for (i = 0; i < nelt2; ++i)
30594 remap[i + nelt2] = i;
30595 remap[i + nelt] = i + nelt2;
30596 dremap.perm[i] = i + nelt2;
30597 dremap.perm[i + nelt2] = i + nelt;
30601 dremap.vmode = V2DImode;
30603 dremap.perm[0] = 1;
30604 dremap.perm[1] = 2;
30610 /* Use the remapping array set up above to move the elements from their
30611 swizzled locations into their final destinations. */
30613 for (i = 0; i < nelt; ++i)
30615 unsigned e = remap[d->perm[i]];
30616 gcc_assert (e < nelt);
30617 dfinal.perm[i] = e;
30619 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
30620 dfinal.op1 = dfinal.op0;
30621 dremap.target = dfinal.op0;
30623 /* Test if the final remap can be done with a single insn. For V4SFmode or
30624 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
30626 ok = expand_vec_perm_1 (&dfinal);
30627 seq = get_insns ();
30633 if (dremap.vmode != dfinal.vmode)
30635 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
30636 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
30637 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
30640 ok = expand_vec_perm_1 (&dremap);
30647 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
30648 permutation with two pshufb insns and an ior. We should have already
30649 failed all two instruction sequences. */
30652 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
30654 rtx rperm[2][16], vperm, l, h, op, m128;
30655 unsigned int i, nelt, eltsz;
30657 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
30659 gcc_assert (d->op0 != d->op1);
30662 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
30664 /* Generate two permutation masks. If the required element is within
30665 the given vector it is shuffled into the proper lane. If the required
30666 element is in the other vector, force a zero into the lane by setting
30667 bit 7 in the permutation mask. */
30668 m128 = GEN_INT (-128);
30669 for (i = 0; i < nelt; ++i)
30671 unsigned j, e = d->perm[i];
30672 unsigned which = (e >= nelt);
30676 for (j = 0; j < eltsz; ++j)
30678 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
30679 rperm[1-which][i*eltsz + j] = m128;
30683 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
30684 vperm = force_reg (V16QImode, vperm);
30686 l = gen_reg_rtx (V16QImode);
30687 op = gen_lowpart (V16QImode, d->op0);
30688 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
30690 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
30691 vperm = force_reg (V16QImode, vperm);
30693 h = gen_reg_rtx (V16QImode);
30694 op = gen_lowpart (V16QImode, d->op1);
30695 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
30697 op = gen_lowpart (V16QImode, d->target);
30698 emit_insn (gen_iorv16qi3 (op, l, h));
30703 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
30704 and extract-odd permutations. */
30707 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
30709 rtx t1, t2, t3, t4;
30714 t1 = gen_reg_rtx (V4DFmode);
30715 t2 = gen_reg_rtx (V4DFmode);
30717 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
30718 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
30719 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
30721 /* Now an unpck[lh]pd will produce the result required. */
30723 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
30725 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
30731 static const unsigned char perm1[8] = { 0, 2, 1, 3, 5, 6, 5, 7 };
30732 static const unsigned char perme[8] = { 0, 1, 8, 9, 4, 5, 12, 13 };
30733 static const unsigned char permo[8] = { 2, 3, 10, 11, 6, 7, 14, 15 };
30735 t1 = gen_reg_rtx (V8SFmode);
30736 t2 = gen_reg_rtx (V8SFmode);
30737 t3 = gen_reg_rtx (V8SFmode);
30738 t4 = gen_reg_rtx (V8SFmode);
30740 /* Shuffle within the 128-bit lanes to produce:
30741 { 0 2 1 3 4 6 5 7 } and { 8 a 9 b c e d f }. */
30742 expand_vselect (t1, d->op0, perm1, 8);
30743 expand_vselect (t2, d->op1, perm1, 8);
30745 /* Shuffle the lanes around to produce:
30746 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
30747 emit_insn (gen_avx_vperm2f128v8sf3 (t3, t1, t2, GEN_INT (0x20)));
30748 emit_insn (gen_avx_vperm2f128v8sf3 (t4, t1, t2, GEN_INT (0x31)));
30750 /* Now a vpermil2p will produce the result required. */
30751 /* ??? The vpermil2p requires a vector constant. Another option
30752 is a unpck[lh]ps to merge the two vectors to produce
30753 { 0 4 2 6 8 c a e } or { 1 5 3 7 9 d b f }. Then use another
30754 vpermilps to get the elements into the final order. */
30757 memcpy (d->perm, odd ? permo: perme, 8);
30758 expand_vec_perm_vpermil (d);
30766 /* These are always directly implementable by expand_vec_perm_1. */
30767 gcc_unreachable ();
30771 return expand_vec_perm_pshufb2 (d);
30774 /* We need 2*log2(N)-1 operations to achieve odd/even
30775 with interleave. */
30776 t1 = gen_reg_rtx (V8HImode);
30777 t2 = gen_reg_rtx (V8HImode);
30778 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
30779 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
30780 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
30781 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
30783 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
30785 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
30792 return expand_vec_perm_pshufb2 (d);
30795 t1 = gen_reg_rtx (V16QImode);
30796 t2 = gen_reg_rtx (V16QImode);
30797 t3 = gen_reg_rtx (V16QImode);
30798 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
30799 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
30800 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
30801 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
30802 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
30803 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
30805 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
30807 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
30813 gcc_unreachable ();
30819 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
30820 extract-even and extract-odd permutations. */
30823 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
30825 unsigned i, odd, nelt = d->nelt;
30828 if (odd != 0 && odd != 1)
30831 for (i = 1; i < nelt; ++i)
30832 if (d->perm[i] != 2 * i + odd)
30835 return expand_vec_perm_even_odd_1 (d, odd);
30838 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
30839 permutations. We assume that expand_vec_perm_1 has already failed. */
30842 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
30844 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
30845 enum machine_mode vmode = d->vmode;
30846 unsigned char perm2[4];
30854 /* These are special-cased in sse.md so that we can optionally
30855 use the vbroadcast instruction. They expand to two insns
30856 if the input happens to be in a register. */
30857 gcc_unreachable ();
30863 /* These are always implementable using standard shuffle patterns. */
30864 gcc_unreachable ();
30868 /* These can be implemented via interleave. We save one insn by
30869 stopping once we have promoted to V4SImode and then use pshufd. */
30872 optab otab = vec_interleave_low_optab;
30876 otab = vec_interleave_high_optab;
30881 op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
30882 vmode = get_mode_wider_vector (vmode);
30883 op0 = gen_lowpart (vmode, op0);
30885 while (vmode != V4SImode);
30887 memset (perm2, elt, 4);
30888 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
30893 gcc_unreachable ();
30897 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
30898 broadcast permutations. */
30901 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
30903 unsigned i, elt, nelt = d->nelt;
30905 if (d->op0 != d->op1)
30909 for (i = 1; i < nelt; ++i)
30910 if (d->perm[i] != elt)
30913 return expand_vec_perm_broadcast_1 (d);
30916 /* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
30917 With all of the interface bits taken care of, perform the expansion
30918 in D and return true on success. */
30921 ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
30923 /* Try a single instruction expansion. */
30924 if (expand_vec_perm_1 (d))
30927 /* Try sequences of two instructions. */
30929 if (expand_vec_perm_pshuflw_pshufhw (d))
30932 if (expand_vec_perm_palignr (d))
30935 if (expand_vec_perm_interleave2 (d))
30938 if (expand_vec_perm_broadcast (d))
30941 /* Try sequences of three instructions. */
30943 if (expand_vec_perm_pshufb2 (d))
30946 /* ??? Look for narrow permutations whose element orderings would
30947 allow the promotion to a wider mode. */
30949 /* ??? Look for sequences of interleave or a wider permute that place
30950 the data into the correct lanes for a half-vector shuffle like
30951 pshuf[lh]w or vpermilps. */
30953 /* ??? Look for sequences of interleave that produce the desired results.
30954 The combinatorics of punpck[lh] get pretty ugly... */
30956 if (expand_vec_perm_even_odd (d))
30962 /* Extract the values from the vector CST into the permutation array in D.
30963 Return 0 on error, 1 if all values from the permutation come from the
30964 first vector, 2 if all values from the second vector, and 3 otherwise. */
30967 extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst)
30969 tree list = TREE_VECTOR_CST_ELTS (cst);
30970 unsigned i, nelt = d->nelt;
30973 for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
30975 unsigned HOST_WIDE_INT e;
30977 if (!host_integerp (TREE_VALUE (list), 1))
30979 e = tree_low_cst (TREE_VALUE (list), 1);
30983 ret |= (e < nelt ? 1 : 2);
30986 gcc_assert (list == NULL);
30988 /* For all elements from second vector, fold the elements to first. */
30990 for (i = 0; i < nelt; ++i)
30991 d->perm[i] -= nelt;
30997 ix86_expand_vec_perm_builtin (tree exp)
30999 struct expand_vec_perm_d d;
31000 tree arg0, arg1, arg2;
31002 arg0 = CALL_EXPR_ARG (exp, 0);
31003 arg1 = CALL_EXPR_ARG (exp, 1);
31004 arg2 = CALL_EXPR_ARG (exp, 2);
31006 d.vmode = TYPE_MODE (TREE_TYPE (arg0));
31007 d.nelt = GET_MODE_NUNITS (d.vmode);
31008 d.testing_p = false;
31009 gcc_assert (VECTOR_MODE_P (d.vmode));
31011 if (TREE_CODE (arg2) != VECTOR_CST)
31013 error_at (EXPR_LOCATION (exp),
31014 "vector permutation requires vector constant");
31018 switch (extract_vec_perm_cst (&d, arg2))
31024 error_at (EXPR_LOCATION (exp), "invalid vector permutation constant");
31028 if (!operand_equal_p (arg0, arg1, 0))
31030 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
31031 d.op0 = force_reg (d.vmode, d.op0);
31032 d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
31033 d.op1 = force_reg (d.vmode, d.op1);
31037 /* The elements of PERM do not suggest that only the first operand
31038 is used, but both operands are identical. Allow easier matching
31039 of the permutation by folding the permutation into the single
31042 unsigned i, nelt = d.nelt;
31043 for (i = 0; i < nelt; ++i)
31044 if (d.perm[i] >= nelt)
31050 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
31051 d.op0 = force_reg (d.vmode, d.op0);
31056 d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
31057 d.op0 = force_reg (d.vmode, d.op0);
31062 d.target = gen_reg_rtx (d.vmode);
31063 if (ix86_expand_vec_perm_builtin_1 (&d))
31066 /* For compiler generated permutations, we should never got here, because
31067 the compiler should also be checking the ok hook. But since this is a
31068 builtin the user has access too, so don't abort. */
31072 sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]);
31075 sorry ("vector permutation (%d %d %d %d)",
31076 d.perm[0], d.perm[1], d.perm[2], d.perm[3]);
31079 sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
31080 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
31081 d.perm[4], d.perm[5], d.perm[6], d.perm[7]);
31084 sorry ("vector permutation "
31085 "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
31086 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
31087 d.perm[4], d.perm[5], d.perm[6], d.perm[7],
31088 d.perm[8], d.perm[9], d.perm[10], d.perm[11],
31089 d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
31092 gcc_unreachable ();
31095 return CONST0_RTX (d.vmode);
31098 /* Implement targetm.vectorize.builtin_vec_perm_ok. */
31101 ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask)
31103 struct expand_vec_perm_d d;
31107 d.vmode = TYPE_MODE (vec_type);
31108 d.nelt = GET_MODE_NUNITS (d.vmode);
31109 d.testing_p = true;
31111 /* Given sufficient ISA support we can just return true here
31112 for selected vector modes. */
31113 if (GET_MODE_SIZE (d.vmode) == 16)
31115 /* All implementable with a single vpperm insn. */
31118 /* All implementable with 2 pshufb + 1 ior. */
31121 /* All implementable with shufpd or unpck[lh]pd. */
31126 vec_mask = extract_vec_perm_cst (&d, mask);
31128 /* This hook is cannot be called in response to something that the
31129 user does (unlike the builtin expander) so we shouldn't ever see
31130 an error generated from the extract. */
31131 gcc_assert (vec_mask > 0 && vec_mask <= 3);
31132 one_vec = (vec_mask != 3);
31134 /* Implementable with shufps or pshufd. */
31135 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
31138 /* Otherwise we have to go through the motions and see if we can
31139 figure out how to generate the requested permutation. */
31140 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
31141 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
31143 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
31146 ret = ix86_expand_vec_perm_builtin_1 (&d);
31153 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
31155 struct expand_vec_perm_d d;
31161 d.vmode = GET_MODE (targ);
31162 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
31163 d.testing_p = false;
31165 for (i = 0; i < nelt; ++i)
31166 d.perm[i] = i * 2 + odd;
31168 /* We'll either be able to implement the permutation directly... */
31169 if (expand_vec_perm_1 (&d))
31172 /* ... or we use the special-case patterns. */
31173 expand_vec_perm_even_odd_1 (&d, odd);
31176 /* This function returns the calling abi specific va_list type node.
31177 It returns the FNDECL specific va_list type. */
31180 ix86_fn_abi_va_list (tree fndecl)
31183 return va_list_type_node;
31184 gcc_assert (fndecl != NULL_TREE);
31186 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
31187 return ms_va_list_type_node;
31189 return sysv_va_list_type_node;
31192 /* Returns the canonical va_list type specified by TYPE. If there
31193 is no valid TYPE provided, it return NULL_TREE. */
31196 ix86_canonical_va_list_type (tree type)
31200 /* Resolve references and pointers to va_list type. */
31201 if (TREE_CODE (type) == MEM_REF)
31202 type = TREE_TYPE (type);
31203 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
31204 type = TREE_TYPE (type);
31205 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
31206 type = TREE_TYPE (type);
31210 wtype = va_list_type_node;
31211 gcc_assert (wtype != NULL_TREE);
31213 if (TREE_CODE (wtype) == ARRAY_TYPE)
31215 /* If va_list is an array type, the argument may have decayed
31216 to a pointer type, e.g. by being passed to another function.
31217 In that case, unwrap both types so that we can compare the
31218 underlying records. */
31219 if (TREE_CODE (htype) == ARRAY_TYPE
31220 || POINTER_TYPE_P (htype))
31222 wtype = TREE_TYPE (wtype);
31223 htype = TREE_TYPE (htype);
31226 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
31227 return va_list_type_node;
31228 wtype = sysv_va_list_type_node;
31229 gcc_assert (wtype != NULL_TREE);
31231 if (TREE_CODE (wtype) == ARRAY_TYPE)
31233 /* If va_list is an array type, the argument may have decayed
31234 to a pointer type, e.g. by being passed to another function.
31235 In that case, unwrap both types so that we can compare the
31236 underlying records. */
31237 if (TREE_CODE (htype) == ARRAY_TYPE
31238 || POINTER_TYPE_P (htype))
31240 wtype = TREE_TYPE (wtype);
31241 htype = TREE_TYPE (htype);
31244 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
31245 return sysv_va_list_type_node;
31246 wtype = ms_va_list_type_node;
31247 gcc_assert (wtype != NULL_TREE);
31249 if (TREE_CODE (wtype) == ARRAY_TYPE)
31251 /* If va_list is an array type, the argument may have decayed
31252 to a pointer type, e.g. by being passed to another function.
31253 In that case, unwrap both types so that we can compare the
31254 underlying records. */
31255 if (TREE_CODE (htype) == ARRAY_TYPE
31256 || POINTER_TYPE_P (htype))
31258 wtype = TREE_TYPE (wtype);
31259 htype = TREE_TYPE (htype);
31262 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
31263 return ms_va_list_type_node;
31266 return std_canonical_va_list_type (type);
31269 /* Iterate through the target-specific builtin types for va_list.
31270 IDX denotes the iterator, *PTREE is set to the result type of
31271 the va_list builtin, and *PNAME to its internal type.
31272 Returns zero if there is no element for this index, otherwise
31273 IDX should be increased upon the next call.
31274 Note, do not iterate a base builtin's name like __builtin_va_list.
31275 Used from c_common_nodes_and_builtins. */
31278 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
31288 *ptree = ms_va_list_type_node;
31289 *pname = "__builtin_ms_va_list";
31293 *ptree = sysv_va_list_type_node;
31294 *pname = "__builtin_sysv_va_list";
31302 /* Initialize the GCC target structure. */
31303 #undef TARGET_RETURN_IN_MEMORY
31304 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
31306 #undef TARGET_LEGITIMIZE_ADDRESS
31307 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
31309 #undef TARGET_ATTRIBUTE_TABLE
31310 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
31311 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
31312 # undef TARGET_MERGE_DECL_ATTRIBUTES
31313 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
31316 #undef TARGET_COMP_TYPE_ATTRIBUTES
31317 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
31319 #undef TARGET_INIT_BUILTINS
31320 #define TARGET_INIT_BUILTINS ix86_init_builtins
31321 #undef TARGET_BUILTIN_DECL
31322 #define TARGET_BUILTIN_DECL ix86_builtin_decl
31323 #undef TARGET_EXPAND_BUILTIN
31324 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
31326 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
31327 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
31328 ix86_builtin_vectorized_function
31330 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
31331 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
31333 #undef TARGET_BUILTIN_RECIPROCAL
31334 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
31336 #undef TARGET_ASM_FUNCTION_EPILOGUE
31337 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
31339 #undef TARGET_ENCODE_SECTION_INFO
31340 #ifndef SUBTARGET_ENCODE_SECTION_INFO
31341 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
31343 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
31346 #undef TARGET_ASM_OPEN_PAREN
31347 #define TARGET_ASM_OPEN_PAREN ""
31348 #undef TARGET_ASM_CLOSE_PAREN
31349 #define TARGET_ASM_CLOSE_PAREN ""
31351 #undef TARGET_ASM_BYTE_OP
31352 #define TARGET_ASM_BYTE_OP ASM_BYTE
31354 #undef TARGET_ASM_ALIGNED_HI_OP
31355 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
31356 #undef TARGET_ASM_ALIGNED_SI_OP
31357 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
31359 #undef TARGET_ASM_ALIGNED_DI_OP
31360 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
31363 #undef TARGET_ASM_UNALIGNED_HI_OP
31364 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
31365 #undef TARGET_ASM_UNALIGNED_SI_OP
31366 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
31367 #undef TARGET_ASM_UNALIGNED_DI_OP
31368 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
31370 #undef TARGET_PRINT_OPERAND
31371 #define TARGET_PRINT_OPERAND ix86_print_operand
31372 #undef TARGET_PRINT_OPERAND_ADDRESS
31373 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
31374 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
31375 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
31377 #undef TARGET_SCHED_ADJUST_COST
31378 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
31379 #undef TARGET_SCHED_ISSUE_RATE
31380 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
31381 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
31382 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
31383 ia32_multipass_dfa_lookahead
31385 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
31386 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
31389 #undef TARGET_HAVE_TLS
31390 #define TARGET_HAVE_TLS true
31392 #undef TARGET_CANNOT_FORCE_CONST_MEM
31393 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
31394 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
31395 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
31397 #undef TARGET_DELEGITIMIZE_ADDRESS
31398 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
31400 #undef TARGET_MS_BITFIELD_LAYOUT_P
31401 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
31404 #undef TARGET_BINDS_LOCAL_P
31405 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
31407 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
31408 #undef TARGET_BINDS_LOCAL_P
31409 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
31412 #undef TARGET_ASM_OUTPUT_MI_THUNK
31413 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
31414 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
31415 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
31417 #undef TARGET_ASM_FILE_START
31418 #define TARGET_ASM_FILE_START x86_file_start
31420 #undef TARGET_DEFAULT_TARGET_FLAGS
31421 #define TARGET_DEFAULT_TARGET_FLAGS \
31423 | TARGET_SUBTARGET_DEFAULT \
31424 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT \
31427 #undef TARGET_HANDLE_OPTION
31428 #define TARGET_HANDLE_OPTION ix86_handle_option
31430 #undef TARGET_REGISTER_MOVE_COST
31431 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
31432 #undef TARGET_MEMORY_MOVE_COST
31433 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
31434 #undef TARGET_RTX_COSTS
31435 #define TARGET_RTX_COSTS ix86_rtx_costs
31436 #undef TARGET_ADDRESS_COST
31437 #define TARGET_ADDRESS_COST ix86_address_cost
31439 #undef TARGET_FIXED_CONDITION_CODE_REGS
31440 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
31441 #undef TARGET_CC_MODES_COMPATIBLE
31442 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
31444 #undef TARGET_MACHINE_DEPENDENT_REORG
31445 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
31447 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
31448 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
31450 #undef TARGET_BUILD_BUILTIN_VA_LIST
31451 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
31453 #undef TARGET_ENUM_VA_LIST_P
31454 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
31456 #undef TARGET_FN_ABI_VA_LIST
31457 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
31459 #undef TARGET_CANONICAL_VA_LIST_TYPE
31460 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
31462 #undef TARGET_EXPAND_BUILTIN_VA_START
31463 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
31465 #undef TARGET_MD_ASM_CLOBBERS
31466 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
31468 #undef TARGET_PROMOTE_PROTOTYPES
31469 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
31470 #undef TARGET_STRUCT_VALUE_RTX
31471 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
31472 #undef TARGET_SETUP_INCOMING_VARARGS
31473 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
31474 #undef TARGET_MUST_PASS_IN_STACK
31475 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
31476 #undef TARGET_FUNCTION_ARG_ADVANCE
31477 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
31478 #undef TARGET_FUNCTION_ARG
31479 #define TARGET_FUNCTION_ARG ix86_function_arg
31480 #undef TARGET_PASS_BY_REFERENCE
31481 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
31482 #undef TARGET_INTERNAL_ARG_POINTER
31483 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
31484 #undef TARGET_UPDATE_STACK_BOUNDARY
31485 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
31486 #undef TARGET_GET_DRAP_RTX
31487 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
31488 #undef TARGET_STRICT_ARGUMENT_NAMING
31489 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
31490 #undef TARGET_STATIC_CHAIN
31491 #define TARGET_STATIC_CHAIN ix86_static_chain
31492 #undef TARGET_TRAMPOLINE_INIT
31493 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
31494 #undef TARGET_RETURN_POPS_ARGS
31495 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
31497 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
31498 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
31500 #undef TARGET_SCALAR_MODE_SUPPORTED_P
31501 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
31503 #undef TARGET_VECTOR_MODE_SUPPORTED_P
31504 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
31506 #undef TARGET_C_MODE_FOR_SUFFIX
31507 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
31510 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
31511 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
31514 #ifdef SUBTARGET_INSERT_ATTRIBUTES
31515 #undef TARGET_INSERT_ATTRIBUTES
31516 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
31519 #undef TARGET_MANGLE_TYPE
31520 #define TARGET_MANGLE_TYPE ix86_mangle_type
31522 #undef TARGET_STACK_PROTECT_FAIL
31523 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
31525 #undef TARGET_FUNCTION_VALUE
31526 #define TARGET_FUNCTION_VALUE ix86_function_value
31528 #undef TARGET_FUNCTION_VALUE_REGNO_P
31529 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
31531 #undef TARGET_SECONDARY_RELOAD
31532 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
31534 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
31535 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
31536 ix86_builtin_vectorization_cost
31537 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
31538 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
31539 ix86_vectorize_builtin_vec_perm
31540 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
31541 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
31542 ix86_vectorize_builtin_vec_perm_ok
31544 #undef TARGET_SET_CURRENT_FUNCTION
31545 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
31547 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
31548 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
31550 #undef TARGET_OPTION_SAVE
31551 #define TARGET_OPTION_SAVE ix86_function_specific_save
31553 #undef TARGET_OPTION_RESTORE
31554 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
31556 #undef TARGET_OPTION_PRINT
31557 #define TARGET_OPTION_PRINT ix86_function_specific_print
31559 #undef TARGET_CAN_INLINE_P
31560 #define TARGET_CAN_INLINE_P ix86_can_inline_p
31562 #undef TARGET_EXPAND_TO_RTL_HOOK
31563 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
31565 #undef TARGET_LEGITIMATE_ADDRESS_P
31566 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
31568 #undef TARGET_IRA_COVER_CLASSES
31569 #define TARGET_IRA_COVER_CLASSES i386_ira_cover_classes
31571 #undef TARGET_FRAME_POINTER_REQUIRED
31572 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
31574 #undef TARGET_CAN_ELIMINATE
31575 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
31577 #undef TARGET_ASM_CODE_END
31578 #define TARGET_ASM_CODE_END ix86_code_end
31580 struct gcc_target targetm = TARGET_INITIALIZER;
31582 #include "gt-i386.h"