1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
53 #include "tm-constrs.h"
57 #include "dwarf2out.h"
58 #include "sched-int.h"
59 static rtx legitimize_dllimport_symbol (rtx, bool);
61 #ifndef CHECK_STACK_LIMIT
62 #define CHECK_STACK_LIMIT (-1)
65 /* Return index of given mode in mult and division cost tables. */
66 #define MODE_INDEX(mode) \
67 ((mode) == QImode ? 0 \
68 : (mode) == HImode ? 1 \
69 : (mode) == SImode ? 2 \
70 : (mode) == DImode ? 3 \
73 /* Processor costs (relative to an add) */
74 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
75 #define COSTS_N_BYTES(N) ((N) * 2)
77 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
80 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
81 COSTS_N_BYTES (2), /* cost of an add instruction */
82 COSTS_N_BYTES (3), /* cost of a lea instruction */
83 COSTS_N_BYTES (2), /* variable shift costs */
84 COSTS_N_BYTES (3), /* constant shift costs */
85 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 0, /* cost of multiply per each bit set */
91 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
92 COSTS_N_BYTES (3), /* HI */
93 COSTS_N_BYTES (3), /* SI */
94 COSTS_N_BYTES (3), /* DI */
95 COSTS_N_BYTES (5)}, /* other */
96 COSTS_N_BYTES (3), /* cost of movsx */
97 COSTS_N_BYTES (3), /* cost of movzx */
100 2, /* cost for loading QImode using movzbl */
101 {2, 2, 2}, /* cost of loading integer registers
102 in QImode, HImode and SImode.
103 Relative to reg-reg move (2). */
104 {2, 2, 2}, /* cost of storing integer registers */
105 2, /* cost of reg,reg fld/fst */
106 {2, 2, 2}, /* cost of loading fp registers
107 in SFmode, DFmode and XFmode */
108 {2, 2, 2}, /* cost of storing fp registers
109 in SFmode, DFmode and XFmode */
110 3, /* cost of moving MMX register */
111 {3, 3}, /* cost of loading MMX registers
112 in SImode and DImode */
113 {3, 3}, /* cost of storing MMX registers
114 in SImode and DImode */
115 3, /* cost of moving SSE register */
116 {3, 3, 3}, /* cost of loading SSE registers
117 in SImode, DImode and TImode */
118 {3, 3, 3}, /* cost of storing SSE registers
119 in SImode, DImode and TImode */
120 3, /* MMX or SSE register to integer */
121 0, /* size of l1 cache */
122 0, /* size of l2 cache */
123 0, /* size of prefetch block */
124 0, /* number of parallel prefetches */
126 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
127 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
128 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
129 COSTS_N_BYTES (2), /* cost of FABS instruction. */
130 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
131 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
132 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
133 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
134 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
135 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
136 1, /* scalar_stmt_cost. */
137 1, /* scalar load_cost. */
138 1, /* scalar_store_cost. */
139 1, /* vec_stmt_cost. */
140 1, /* vec_to_scalar_cost. */
141 1, /* scalar_to_vec_cost. */
142 1, /* vec_align_load_cost. */
143 1, /* vec_unalign_load_cost. */
144 1, /* vec_store_cost. */
145 1, /* cond_taken_branch_cost. */
146 1, /* cond_not_taken_branch_cost. */
149 /* Processor costs (relative to an add) */
151 struct processor_costs i386_cost = { /* 386 specific costs */
152 COSTS_N_INSNS (1), /* cost of an add instruction */
153 COSTS_N_INSNS (1), /* cost of a lea instruction */
154 COSTS_N_INSNS (3), /* variable shift costs */
155 COSTS_N_INSNS (2), /* constant shift costs */
156 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
157 COSTS_N_INSNS (6), /* HI */
158 COSTS_N_INSNS (6), /* SI */
159 COSTS_N_INSNS (6), /* DI */
160 COSTS_N_INSNS (6)}, /* other */
161 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
162 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
163 COSTS_N_INSNS (23), /* HI */
164 COSTS_N_INSNS (23), /* SI */
165 COSTS_N_INSNS (23), /* DI */
166 COSTS_N_INSNS (23)}, /* other */
167 COSTS_N_INSNS (3), /* cost of movsx */
168 COSTS_N_INSNS (2), /* cost of movzx */
169 15, /* "large" insn */
171 4, /* cost for loading QImode using movzbl */
172 {2, 4, 2}, /* cost of loading integer registers
173 in QImode, HImode and SImode.
174 Relative to reg-reg move (2). */
175 {2, 4, 2}, /* cost of storing integer registers */
176 2, /* cost of reg,reg fld/fst */
177 {8, 8, 8}, /* cost of loading fp registers
178 in SFmode, DFmode and XFmode */
179 {8, 8, 8}, /* cost of storing fp registers
180 in SFmode, DFmode and XFmode */
181 2, /* cost of moving MMX register */
182 {4, 8}, /* cost of loading MMX registers
183 in SImode and DImode */
184 {4, 8}, /* cost of storing MMX registers
185 in SImode and DImode */
186 2, /* cost of moving SSE register */
187 {4, 8, 16}, /* cost of loading SSE registers
188 in SImode, DImode and TImode */
189 {4, 8, 16}, /* cost of storing SSE registers
190 in SImode, DImode and TImode */
191 3, /* MMX or SSE register to integer */
192 0, /* size of l1 cache */
193 0, /* size of l2 cache */
194 0, /* size of prefetch block */
195 0, /* number of parallel prefetches */
197 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
198 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
199 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
200 COSTS_N_INSNS (22), /* cost of FABS instruction. */
201 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
202 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
203 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
204 DUMMY_STRINGOP_ALGS},
205 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
206 DUMMY_STRINGOP_ALGS},
207 1, /* scalar_stmt_cost. */
208 1, /* scalar load_cost. */
209 1, /* scalar_store_cost. */
210 1, /* vec_stmt_cost. */
211 1, /* vec_to_scalar_cost. */
212 1, /* scalar_to_vec_cost. */
213 1, /* vec_align_load_cost. */
214 2, /* vec_unalign_load_cost. */
215 1, /* vec_store_cost. */
216 3, /* cond_taken_branch_cost. */
217 1, /* cond_not_taken_branch_cost. */
221 struct processor_costs i486_cost = { /* 486 specific costs */
222 COSTS_N_INSNS (1), /* cost of an add instruction */
223 COSTS_N_INSNS (1), /* cost of a lea instruction */
224 COSTS_N_INSNS (3), /* variable shift costs */
225 COSTS_N_INSNS (2), /* constant shift costs */
226 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
227 COSTS_N_INSNS (12), /* HI */
228 COSTS_N_INSNS (12), /* SI */
229 COSTS_N_INSNS (12), /* DI */
230 COSTS_N_INSNS (12)}, /* other */
231 1, /* cost of multiply per each bit set */
232 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
233 COSTS_N_INSNS (40), /* HI */
234 COSTS_N_INSNS (40), /* SI */
235 COSTS_N_INSNS (40), /* DI */
236 COSTS_N_INSNS (40)}, /* other */
237 COSTS_N_INSNS (3), /* cost of movsx */
238 COSTS_N_INSNS (2), /* cost of movzx */
239 15, /* "large" insn */
241 4, /* cost for loading QImode using movzbl */
242 {2, 4, 2}, /* cost of loading integer registers
243 in QImode, HImode and SImode.
244 Relative to reg-reg move (2). */
245 {2, 4, 2}, /* cost of storing integer registers */
246 2, /* cost of reg,reg fld/fst */
247 {8, 8, 8}, /* cost of loading fp registers
248 in SFmode, DFmode and XFmode */
249 {8, 8, 8}, /* cost of storing fp registers
250 in SFmode, DFmode and XFmode */
251 2, /* cost of moving MMX register */
252 {4, 8}, /* cost of loading MMX registers
253 in SImode and DImode */
254 {4, 8}, /* cost of storing MMX registers
255 in SImode and DImode */
256 2, /* cost of moving SSE register */
257 {4, 8, 16}, /* cost of loading SSE registers
258 in SImode, DImode and TImode */
259 {4, 8, 16}, /* cost of storing SSE registers
260 in SImode, DImode and TImode */
261 3, /* MMX or SSE register to integer */
262 4, /* size of l1 cache. 486 has 8kB cache
263 shared for code and data, so 4kB is
264 not really precise. */
265 4, /* size of l2 cache */
266 0, /* size of prefetch block */
267 0, /* number of parallel prefetches */
269 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
270 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
271 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
272 COSTS_N_INSNS (3), /* cost of FABS instruction. */
273 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
274 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
275 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
276 DUMMY_STRINGOP_ALGS},
277 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
278 DUMMY_STRINGOP_ALGS},
279 1, /* scalar_stmt_cost. */
280 1, /* scalar load_cost. */
281 1, /* scalar_store_cost. */
282 1, /* vec_stmt_cost. */
283 1, /* vec_to_scalar_cost. */
284 1, /* scalar_to_vec_cost. */
285 1, /* vec_align_load_cost. */
286 2, /* vec_unalign_load_cost. */
287 1, /* vec_store_cost. */
288 3, /* cond_taken_branch_cost. */
289 1, /* cond_not_taken_branch_cost. */
293 struct processor_costs pentium_cost = {
294 COSTS_N_INSNS (1), /* cost of an add instruction */
295 COSTS_N_INSNS (1), /* cost of a lea instruction */
296 COSTS_N_INSNS (4), /* variable shift costs */
297 COSTS_N_INSNS (1), /* constant shift costs */
298 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
299 COSTS_N_INSNS (11), /* HI */
300 COSTS_N_INSNS (11), /* SI */
301 COSTS_N_INSNS (11), /* DI */
302 COSTS_N_INSNS (11)}, /* other */
303 0, /* cost of multiply per each bit set */
304 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
305 COSTS_N_INSNS (25), /* HI */
306 COSTS_N_INSNS (25), /* SI */
307 COSTS_N_INSNS (25), /* DI */
308 COSTS_N_INSNS (25)}, /* other */
309 COSTS_N_INSNS (3), /* cost of movsx */
310 COSTS_N_INSNS (2), /* cost of movzx */
311 8, /* "large" insn */
313 6, /* cost for loading QImode using movzbl */
314 {2, 4, 2}, /* cost of loading integer registers
315 in QImode, HImode and SImode.
316 Relative to reg-reg move (2). */
317 {2, 4, 2}, /* cost of storing integer registers */
318 2, /* cost of reg,reg fld/fst */
319 {2, 2, 6}, /* cost of loading fp registers
320 in SFmode, DFmode and XFmode */
321 {4, 4, 6}, /* cost of storing fp registers
322 in SFmode, DFmode and XFmode */
323 8, /* cost of moving MMX register */
324 {8, 8}, /* cost of loading MMX registers
325 in SImode and DImode */
326 {8, 8}, /* cost of storing MMX registers
327 in SImode and DImode */
328 2, /* cost of moving SSE register */
329 {4, 8, 16}, /* cost of loading SSE registers
330 in SImode, DImode and TImode */
331 {4, 8, 16}, /* cost of storing SSE registers
332 in SImode, DImode and TImode */
333 3, /* MMX or SSE register to integer */
334 8, /* size of l1 cache. */
335 8, /* size of l2 cache */
336 0, /* size of prefetch block */
337 0, /* number of parallel prefetches */
339 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
340 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
341 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
342 COSTS_N_INSNS (1), /* cost of FABS instruction. */
343 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
344 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
345 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
346 DUMMY_STRINGOP_ALGS},
347 {{libcall, {{-1, rep_prefix_4_byte}}},
348 DUMMY_STRINGOP_ALGS},
349 1, /* scalar_stmt_cost. */
350 1, /* scalar load_cost. */
351 1, /* scalar_store_cost. */
352 1, /* vec_stmt_cost. */
353 1, /* vec_to_scalar_cost. */
354 1, /* scalar_to_vec_cost. */
355 1, /* vec_align_load_cost. */
356 2, /* vec_unalign_load_cost. */
357 1, /* vec_store_cost. */
358 3, /* cond_taken_branch_cost. */
359 1, /* cond_not_taken_branch_cost. */
363 struct processor_costs pentiumpro_cost = {
364 COSTS_N_INSNS (1), /* cost of an add instruction */
365 COSTS_N_INSNS (1), /* cost of a lea instruction */
366 COSTS_N_INSNS (1), /* variable shift costs */
367 COSTS_N_INSNS (1), /* constant shift costs */
368 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
369 COSTS_N_INSNS (4), /* HI */
370 COSTS_N_INSNS (4), /* SI */
371 COSTS_N_INSNS (4), /* DI */
372 COSTS_N_INSNS (4)}, /* other */
373 0, /* cost of multiply per each bit set */
374 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
375 COSTS_N_INSNS (17), /* HI */
376 COSTS_N_INSNS (17), /* SI */
377 COSTS_N_INSNS (17), /* DI */
378 COSTS_N_INSNS (17)}, /* other */
379 COSTS_N_INSNS (1), /* cost of movsx */
380 COSTS_N_INSNS (1), /* cost of movzx */
381 8, /* "large" insn */
383 2, /* cost for loading QImode using movzbl */
384 {4, 4, 4}, /* cost of loading integer registers
385 in QImode, HImode and SImode.
386 Relative to reg-reg move (2). */
387 {2, 2, 2}, /* cost of storing integer registers */
388 2, /* cost of reg,reg fld/fst */
389 {2, 2, 6}, /* cost of loading fp registers
390 in SFmode, DFmode and XFmode */
391 {4, 4, 6}, /* cost of storing fp registers
392 in SFmode, DFmode and XFmode */
393 2, /* cost of moving MMX register */
394 {2, 2}, /* cost of loading MMX registers
395 in SImode and DImode */
396 {2, 2}, /* cost of storing MMX registers
397 in SImode and DImode */
398 2, /* cost of moving SSE register */
399 {2, 2, 8}, /* cost of loading SSE registers
400 in SImode, DImode and TImode */
401 {2, 2, 8}, /* cost of storing SSE registers
402 in SImode, DImode and TImode */
403 3, /* MMX or SSE register to integer */
404 8, /* size of l1 cache. */
405 256, /* size of l2 cache */
406 32, /* size of prefetch block */
407 6, /* number of parallel prefetches */
409 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
410 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
411 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
412 COSTS_N_INSNS (2), /* cost of FABS instruction. */
413 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
414 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
415 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
416 (we ensure the alignment). For small blocks inline loop is still a
417 noticeable win, for bigger blocks either rep movsl or rep movsb is
418 way to go. Rep movsb has apparently more expensive startup time in CPU,
419 but after 4K the difference is down in the noise. */
420 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
421 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
422 DUMMY_STRINGOP_ALGS},
423 {{rep_prefix_4_byte, {{1024, unrolled_loop},
424 {8192, rep_prefix_4_byte}, {-1, libcall}}},
425 DUMMY_STRINGOP_ALGS},
426 1, /* scalar_stmt_cost. */
427 1, /* scalar load_cost. */
428 1, /* scalar_store_cost. */
429 1, /* vec_stmt_cost. */
430 1, /* vec_to_scalar_cost. */
431 1, /* scalar_to_vec_cost. */
432 1, /* vec_align_load_cost. */
433 2, /* vec_unalign_load_cost. */
434 1, /* vec_store_cost. */
435 3, /* cond_taken_branch_cost. */
436 1, /* cond_not_taken_branch_cost. */
440 struct processor_costs geode_cost = {
441 COSTS_N_INSNS (1), /* cost of an add instruction */
442 COSTS_N_INSNS (1), /* cost of a lea instruction */
443 COSTS_N_INSNS (2), /* variable shift costs */
444 COSTS_N_INSNS (1), /* constant shift costs */
445 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
446 COSTS_N_INSNS (4), /* HI */
447 COSTS_N_INSNS (7), /* SI */
448 COSTS_N_INSNS (7), /* DI */
449 COSTS_N_INSNS (7)}, /* other */
450 0, /* cost of multiply per each bit set */
451 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
452 COSTS_N_INSNS (23), /* HI */
453 COSTS_N_INSNS (39), /* SI */
454 COSTS_N_INSNS (39), /* DI */
455 COSTS_N_INSNS (39)}, /* other */
456 COSTS_N_INSNS (1), /* cost of movsx */
457 COSTS_N_INSNS (1), /* cost of movzx */
458 8, /* "large" insn */
460 1, /* cost for loading QImode using movzbl */
461 {1, 1, 1}, /* cost of loading integer registers
462 in QImode, HImode and SImode.
463 Relative to reg-reg move (2). */
464 {1, 1, 1}, /* cost of storing integer registers */
465 1, /* cost of reg,reg fld/fst */
466 {1, 1, 1}, /* cost of loading fp registers
467 in SFmode, DFmode and XFmode */
468 {4, 6, 6}, /* cost of storing fp registers
469 in SFmode, DFmode and XFmode */
471 1, /* cost of moving MMX register */
472 {1, 1}, /* cost of loading MMX registers
473 in SImode and DImode */
474 {1, 1}, /* cost of storing MMX registers
475 in SImode and DImode */
476 1, /* cost of moving SSE register */
477 {1, 1, 1}, /* cost of loading SSE registers
478 in SImode, DImode and TImode */
479 {1, 1, 1}, /* cost of storing SSE registers
480 in SImode, DImode and TImode */
481 1, /* MMX or SSE register to integer */
482 64, /* size of l1 cache. */
483 128, /* size of l2 cache. */
484 32, /* size of prefetch block */
485 1, /* number of parallel prefetches */
487 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
488 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
489 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
490 COSTS_N_INSNS (1), /* cost of FABS instruction. */
491 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
492 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
493 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
494 DUMMY_STRINGOP_ALGS},
495 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
496 DUMMY_STRINGOP_ALGS},
497 1, /* scalar_stmt_cost. */
498 1, /* scalar load_cost. */
499 1, /* scalar_store_cost. */
500 1, /* vec_stmt_cost. */
501 1, /* vec_to_scalar_cost. */
502 1, /* scalar_to_vec_cost. */
503 1, /* vec_align_load_cost. */
504 2, /* vec_unalign_load_cost. */
505 1, /* vec_store_cost. */
506 3, /* cond_taken_branch_cost. */
507 1, /* cond_not_taken_branch_cost. */
511 struct processor_costs k6_cost = {
512 COSTS_N_INSNS (1), /* cost of an add instruction */
513 COSTS_N_INSNS (2), /* cost of a lea instruction */
514 COSTS_N_INSNS (1), /* variable shift costs */
515 COSTS_N_INSNS (1), /* constant shift costs */
516 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
517 COSTS_N_INSNS (3), /* HI */
518 COSTS_N_INSNS (3), /* SI */
519 COSTS_N_INSNS (3), /* DI */
520 COSTS_N_INSNS (3)}, /* other */
521 0, /* cost of multiply per each bit set */
522 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
523 COSTS_N_INSNS (18), /* HI */
524 COSTS_N_INSNS (18), /* SI */
525 COSTS_N_INSNS (18), /* DI */
526 COSTS_N_INSNS (18)}, /* other */
527 COSTS_N_INSNS (2), /* cost of movsx */
528 COSTS_N_INSNS (2), /* cost of movzx */
529 8, /* "large" insn */
531 3, /* cost for loading QImode using movzbl */
532 {4, 5, 4}, /* cost of loading integer registers
533 in QImode, HImode and SImode.
534 Relative to reg-reg move (2). */
535 {2, 3, 2}, /* cost of storing integer registers */
536 4, /* cost of reg,reg fld/fst */
537 {6, 6, 6}, /* cost of loading fp registers
538 in SFmode, DFmode and XFmode */
539 {4, 4, 4}, /* cost of storing fp registers
540 in SFmode, DFmode and XFmode */
541 2, /* cost of moving MMX register */
542 {2, 2}, /* cost of loading MMX registers
543 in SImode and DImode */
544 {2, 2}, /* cost of storing MMX registers
545 in SImode and DImode */
546 2, /* cost of moving SSE register */
547 {2, 2, 8}, /* cost of loading SSE registers
548 in SImode, DImode and TImode */
549 {2, 2, 8}, /* cost of storing SSE registers
550 in SImode, DImode and TImode */
551 6, /* MMX or SSE register to integer */
552 32, /* size of l1 cache. */
553 32, /* size of l2 cache. Some models
554 have integrated l2 cache, but
555 optimizing for k6 is not important
556 enough to worry about that. */
557 32, /* size of prefetch block */
558 1, /* number of parallel prefetches */
560 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
561 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
562 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
563 COSTS_N_INSNS (2), /* cost of FABS instruction. */
564 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
565 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
566 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
567 DUMMY_STRINGOP_ALGS},
568 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
569 DUMMY_STRINGOP_ALGS},
570 1, /* scalar_stmt_cost. */
571 1, /* scalar load_cost. */
572 1, /* scalar_store_cost. */
573 1, /* vec_stmt_cost. */
574 1, /* vec_to_scalar_cost. */
575 1, /* scalar_to_vec_cost. */
576 1, /* vec_align_load_cost. */
577 2, /* vec_unalign_load_cost. */
578 1, /* vec_store_cost. */
579 3, /* cond_taken_branch_cost. */
580 1, /* cond_not_taken_branch_cost. */
584 struct processor_costs athlon_cost = {
585 COSTS_N_INSNS (1), /* cost of an add instruction */
586 COSTS_N_INSNS (2), /* cost of a lea instruction */
587 COSTS_N_INSNS (1), /* variable shift costs */
588 COSTS_N_INSNS (1), /* constant shift costs */
589 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
590 COSTS_N_INSNS (5), /* HI */
591 COSTS_N_INSNS (5), /* SI */
592 COSTS_N_INSNS (5), /* DI */
593 COSTS_N_INSNS (5)}, /* other */
594 0, /* cost of multiply per each bit set */
595 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
596 COSTS_N_INSNS (26), /* HI */
597 COSTS_N_INSNS (42), /* SI */
598 COSTS_N_INSNS (74), /* DI */
599 COSTS_N_INSNS (74)}, /* other */
600 COSTS_N_INSNS (1), /* cost of movsx */
601 COSTS_N_INSNS (1), /* cost of movzx */
602 8, /* "large" insn */
604 4, /* cost for loading QImode using movzbl */
605 {3, 4, 3}, /* cost of loading integer registers
606 in QImode, HImode and SImode.
607 Relative to reg-reg move (2). */
608 {3, 4, 3}, /* cost of storing integer registers */
609 4, /* cost of reg,reg fld/fst */
610 {4, 4, 12}, /* cost of loading fp registers
611 in SFmode, DFmode and XFmode */
612 {6, 6, 8}, /* cost of storing fp registers
613 in SFmode, DFmode and XFmode */
614 2, /* cost of moving MMX register */
615 {4, 4}, /* cost of loading MMX registers
616 in SImode and DImode */
617 {4, 4}, /* cost of storing MMX registers
618 in SImode and DImode */
619 2, /* cost of moving SSE register */
620 {4, 4, 6}, /* cost of loading SSE registers
621 in SImode, DImode and TImode */
622 {4, 4, 5}, /* cost of storing SSE registers
623 in SImode, DImode and TImode */
624 5, /* MMX or SSE register to integer */
625 64, /* size of l1 cache. */
626 256, /* size of l2 cache. */
627 64, /* size of prefetch block */
628 6, /* number of parallel prefetches */
630 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
631 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
632 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
633 COSTS_N_INSNS (2), /* cost of FABS instruction. */
634 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
635 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
636 /* For some reason, Athlon deals better with REP prefix (relative to loops)
637 compared to K8. Alignment becomes important after 8 bytes for memcpy and
638 128 bytes for memset. */
639 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
640 DUMMY_STRINGOP_ALGS},
641 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
642 DUMMY_STRINGOP_ALGS},
643 1, /* scalar_stmt_cost. */
644 1, /* scalar load_cost. */
645 1, /* scalar_store_cost. */
646 1, /* vec_stmt_cost. */
647 1, /* vec_to_scalar_cost. */
648 1, /* scalar_to_vec_cost. */
649 1, /* vec_align_load_cost. */
650 2, /* vec_unalign_load_cost. */
651 1, /* vec_store_cost. */
652 3, /* cond_taken_branch_cost. */
653 1, /* cond_not_taken_branch_cost. */
657 struct processor_costs k8_cost = {
658 COSTS_N_INSNS (1), /* cost of an add instruction */
659 COSTS_N_INSNS (2), /* cost of a lea instruction */
660 COSTS_N_INSNS (1), /* variable shift costs */
661 COSTS_N_INSNS (1), /* constant shift costs */
662 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
663 COSTS_N_INSNS (4), /* HI */
664 COSTS_N_INSNS (3), /* SI */
665 COSTS_N_INSNS (4), /* DI */
666 COSTS_N_INSNS (5)}, /* other */
667 0, /* cost of multiply per each bit set */
668 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
669 COSTS_N_INSNS (26), /* HI */
670 COSTS_N_INSNS (42), /* SI */
671 COSTS_N_INSNS (74), /* DI */
672 COSTS_N_INSNS (74)}, /* other */
673 COSTS_N_INSNS (1), /* cost of movsx */
674 COSTS_N_INSNS (1), /* cost of movzx */
675 8, /* "large" insn */
677 4, /* cost for loading QImode using movzbl */
678 {3, 4, 3}, /* cost of loading integer registers
679 in QImode, HImode and SImode.
680 Relative to reg-reg move (2). */
681 {3, 4, 3}, /* cost of storing integer registers */
682 4, /* cost of reg,reg fld/fst */
683 {4, 4, 12}, /* cost of loading fp registers
684 in SFmode, DFmode and XFmode */
685 {6, 6, 8}, /* cost of storing fp registers
686 in SFmode, DFmode and XFmode */
687 2, /* cost of moving MMX register */
688 {3, 3}, /* cost of loading MMX registers
689 in SImode and DImode */
690 {4, 4}, /* cost of storing MMX registers
691 in SImode and DImode */
692 2, /* cost of moving SSE register */
693 {4, 3, 6}, /* cost of loading SSE registers
694 in SImode, DImode and TImode */
695 {4, 4, 5}, /* cost of storing SSE registers
696 in SImode, DImode and TImode */
697 5, /* MMX or SSE register to integer */
698 64, /* size of l1 cache. */
699 512, /* size of l2 cache. */
700 64, /* size of prefetch block */
701 /* New AMD processors never drop prefetches; if they cannot be performed
702 immediately, they are queued. We set number of simultaneous prefetches
703 to a large constant to reflect this (it probably is not a good idea not
704 to limit number of prefetches at all, as their execution also takes some
706 100, /* number of parallel prefetches */
708 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
709 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
710 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
711 COSTS_N_INSNS (2), /* cost of FABS instruction. */
712 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
713 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
714 /* K8 has optimized REP instruction for medium sized blocks, but for very
715 small blocks it is better to use loop. For large blocks, libcall can
716 do nontemporary accesses and beat inline considerably. */
717 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
718 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
719 {{libcall, {{8, loop}, {24, unrolled_loop},
720 {2048, rep_prefix_4_byte}, {-1, libcall}}},
721 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
722 4, /* scalar_stmt_cost. */
723 2, /* scalar load_cost. */
724 2, /* scalar_store_cost. */
725 5, /* vec_stmt_cost. */
726 0, /* vec_to_scalar_cost. */
727 2, /* scalar_to_vec_cost. */
728 2, /* vec_align_load_cost. */
729 3, /* vec_unalign_load_cost. */
730 3, /* vec_store_cost. */
731 3, /* cond_taken_branch_cost. */
732 2, /* cond_not_taken_branch_cost. */
735 struct processor_costs amdfam10_cost = {
736 COSTS_N_INSNS (1), /* cost of an add instruction */
737 COSTS_N_INSNS (2), /* cost of a lea instruction */
738 COSTS_N_INSNS (1), /* variable shift costs */
739 COSTS_N_INSNS (1), /* constant shift costs */
740 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
741 COSTS_N_INSNS (4), /* HI */
742 COSTS_N_INSNS (3), /* SI */
743 COSTS_N_INSNS (4), /* DI */
744 COSTS_N_INSNS (5)}, /* other */
745 0, /* cost of multiply per each bit set */
746 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
747 COSTS_N_INSNS (35), /* HI */
748 COSTS_N_INSNS (51), /* SI */
749 COSTS_N_INSNS (83), /* DI */
750 COSTS_N_INSNS (83)}, /* other */
751 COSTS_N_INSNS (1), /* cost of movsx */
752 COSTS_N_INSNS (1), /* cost of movzx */
753 8, /* "large" insn */
755 4, /* cost for loading QImode using movzbl */
756 {3, 4, 3}, /* cost of loading integer registers
757 in QImode, HImode and SImode.
758 Relative to reg-reg move (2). */
759 {3, 4, 3}, /* cost of storing integer registers */
760 4, /* cost of reg,reg fld/fst */
761 {4, 4, 12}, /* cost of loading fp registers
762 in SFmode, DFmode and XFmode */
763 {6, 6, 8}, /* cost of storing fp registers
764 in SFmode, DFmode and XFmode */
765 2, /* cost of moving MMX register */
766 {3, 3}, /* cost of loading MMX registers
767 in SImode and DImode */
768 {4, 4}, /* cost of storing MMX registers
769 in SImode and DImode */
770 2, /* cost of moving SSE register */
771 {4, 4, 3}, /* cost of loading SSE registers
772 in SImode, DImode and TImode */
773 {4, 4, 5}, /* cost of storing SSE registers
774 in SImode, DImode and TImode */
775 3, /* MMX or SSE register to integer */
777 MOVD reg64, xmmreg Double FSTORE 4
778 MOVD reg32, xmmreg Double FSTORE 4
780 MOVD reg64, xmmreg Double FADD 3
782 MOVD reg32, xmmreg Double FADD 3
784 64, /* size of l1 cache. */
785 512, /* size of l2 cache. */
786 64, /* size of prefetch block */
787 /* New AMD processors never drop prefetches; if they cannot be performed
788 immediately, they are queued. We set number of simultaneous prefetches
789 to a large constant to reflect this (it probably is not a good idea not
790 to limit number of prefetches at all, as their execution also takes some
792 100, /* number of parallel prefetches */
794 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
795 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
796 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
797 COSTS_N_INSNS (2), /* cost of FABS instruction. */
798 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
799 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
801 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
802 very small blocks it is better to use loop. For large blocks, libcall can
803 do nontemporary accesses and beat inline considerably. */
804 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
805 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
806 {{libcall, {{8, loop}, {24, unrolled_loop},
807 {2048, rep_prefix_4_byte}, {-1, libcall}}},
808 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
809 4, /* scalar_stmt_cost. */
810 2, /* scalar load_cost. */
811 2, /* scalar_store_cost. */
812 6, /* vec_stmt_cost. */
813 0, /* vec_to_scalar_cost. */
814 2, /* scalar_to_vec_cost. */
815 2, /* vec_align_load_cost. */
816 2, /* vec_unalign_load_cost. */
817 2, /* vec_store_cost. */
818 2, /* cond_taken_branch_cost. */
819 1, /* cond_not_taken_branch_cost. */
822 struct processor_costs bdver1_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (2), /* cost of a lea instruction */
825 COSTS_N_INSNS (1), /* variable shift costs */
826 COSTS_N_INSNS (1), /* constant shift costs */
827 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (4), /* HI */
829 COSTS_N_INSNS (3), /* SI */
830 COSTS_N_INSNS (4), /* DI */
831 COSTS_N_INSNS (5)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (35), /* HI */
835 COSTS_N_INSNS (51), /* SI */
836 COSTS_N_INSNS (83), /* DI */
837 COSTS_N_INSNS (83)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 8, /* "large" insn */
842 4, /* cost for loading QImode using movzbl */
843 {3, 4, 3}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {3, 4, 3}, /* cost of storing integer registers */
847 4, /* cost of reg,reg fld/fst */
848 {4, 4, 12}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {6, 6, 8}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {3, 3}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {4, 4}, /* cost of storing MMX registers
856 in SImode and DImode */
857 2, /* cost of moving SSE register */
858 {4, 4, 3}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {4, 4, 5}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 3, /* MMX or SSE register to integer */
864 MOVD reg64, xmmreg Double FSTORE 4
865 MOVD reg32, xmmreg Double FSTORE 4
867 MOVD reg64, xmmreg Double FADD 3
869 MOVD reg32, xmmreg Double FADD 3
871 64, /* size of l1 cache. */
872 1024, /* size of l2 cache. */
873 64, /* size of prefetch block */
874 /* New AMD processors never drop prefetches; if they cannot be performed
875 immediately, they are queued. We set number of simultaneous prefetches
876 to a large constant to reflect this (it probably is not a good idea not
877 to limit number of prefetches at all, as their execution also takes some
879 100, /* number of parallel prefetches */
881 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
882 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
883 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
884 COSTS_N_INSNS (2), /* cost of FABS instruction. */
885 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
886 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
888 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
889 very small blocks it is better to use loop. For large blocks, libcall
890 can do nontemporary accesses and beat inline considerably. */
891 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
892 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
893 {{libcall, {{8, loop}, {24, unrolled_loop},
894 {2048, rep_prefix_4_byte}, {-1, libcall}}},
895 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
896 4, /* scalar_stmt_cost. */
897 2, /* scalar load_cost. */
898 2, /* scalar_store_cost. */
899 6, /* vec_stmt_cost. */
900 0, /* vec_to_scalar_cost. */
901 2, /* scalar_to_vec_cost. */
902 2, /* vec_align_load_cost. */
903 2, /* vec_unalign_load_cost. */
904 2, /* vec_store_cost. */
905 2, /* cond_taken_branch_cost. */
906 1, /* cond_not_taken_branch_cost. */
910 struct processor_costs pentium4_cost = {
911 COSTS_N_INSNS (1), /* cost of an add instruction */
912 COSTS_N_INSNS (3), /* cost of a lea instruction */
913 COSTS_N_INSNS (4), /* variable shift costs */
914 COSTS_N_INSNS (4), /* constant shift costs */
915 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
916 COSTS_N_INSNS (15), /* HI */
917 COSTS_N_INSNS (15), /* SI */
918 COSTS_N_INSNS (15), /* DI */
919 COSTS_N_INSNS (15)}, /* other */
920 0, /* cost of multiply per each bit set */
921 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
922 COSTS_N_INSNS (56), /* HI */
923 COSTS_N_INSNS (56), /* SI */
924 COSTS_N_INSNS (56), /* DI */
925 COSTS_N_INSNS (56)}, /* other */
926 COSTS_N_INSNS (1), /* cost of movsx */
927 COSTS_N_INSNS (1), /* cost of movzx */
928 16, /* "large" insn */
930 2, /* cost for loading QImode using movzbl */
931 {4, 5, 4}, /* cost of loading integer registers
932 in QImode, HImode and SImode.
933 Relative to reg-reg move (2). */
934 {2, 3, 2}, /* cost of storing integer registers */
935 2, /* cost of reg,reg fld/fst */
936 {2, 2, 6}, /* cost of loading fp registers
937 in SFmode, DFmode and XFmode */
938 {4, 4, 6}, /* cost of storing fp registers
939 in SFmode, DFmode and XFmode */
940 2, /* cost of moving MMX register */
941 {2, 2}, /* cost of loading MMX registers
942 in SImode and DImode */
943 {2, 2}, /* cost of storing MMX registers
944 in SImode and DImode */
945 12, /* cost of moving SSE register */
946 {12, 12, 12}, /* cost of loading SSE registers
947 in SImode, DImode and TImode */
948 {2, 2, 8}, /* cost of storing SSE registers
949 in SImode, DImode and TImode */
950 10, /* MMX or SSE register to integer */
951 8, /* size of l1 cache. */
952 256, /* size of l2 cache. */
953 64, /* size of prefetch block */
954 6, /* number of parallel prefetches */
956 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
957 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
958 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
959 COSTS_N_INSNS (2), /* cost of FABS instruction. */
960 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
961 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
962 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
963 DUMMY_STRINGOP_ALGS},
964 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
966 DUMMY_STRINGOP_ALGS},
967 1, /* scalar_stmt_cost. */
968 1, /* scalar load_cost. */
969 1, /* scalar_store_cost. */
970 1, /* vec_stmt_cost. */
971 1, /* vec_to_scalar_cost. */
972 1, /* scalar_to_vec_cost. */
973 1, /* vec_align_load_cost. */
974 2, /* vec_unalign_load_cost. */
975 1, /* vec_store_cost. */
976 3, /* cond_taken_branch_cost. */
977 1, /* cond_not_taken_branch_cost. */
981 struct processor_costs nocona_cost = {
982 COSTS_N_INSNS (1), /* cost of an add instruction */
983 COSTS_N_INSNS (1), /* cost of a lea instruction */
984 COSTS_N_INSNS (1), /* variable shift costs */
985 COSTS_N_INSNS (1), /* constant shift costs */
986 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
987 COSTS_N_INSNS (10), /* HI */
988 COSTS_N_INSNS (10), /* SI */
989 COSTS_N_INSNS (10), /* DI */
990 COSTS_N_INSNS (10)}, /* other */
991 0, /* cost of multiply per each bit set */
992 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
993 COSTS_N_INSNS (66), /* HI */
994 COSTS_N_INSNS (66), /* SI */
995 COSTS_N_INSNS (66), /* DI */
996 COSTS_N_INSNS (66)}, /* other */
997 COSTS_N_INSNS (1), /* cost of movsx */
998 COSTS_N_INSNS (1), /* cost of movzx */
999 16, /* "large" insn */
1000 17, /* MOVE_RATIO */
1001 4, /* cost for loading QImode using movzbl */
1002 {4, 4, 4}, /* cost of loading integer registers
1003 in QImode, HImode and SImode.
1004 Relative to reg-reg move (2). */
1005 {4, 4, 4}, /* cost of storing integer registers */
1006 3, /* cost of reg,reg fld/fst */
1007 {12, 12, 12}, /* cost of loading fp registers
1008 in SFmode, DFmode and XFmode */
1009 {4, 4, 4}, /* cost of storing fp registers
1010 in SFmode, DFmode and XFmode */
1011 6, /* cost of moving MMX register */
1012 {12, 12}, /* cost of loading MMX registers
1013 in SImode and DImode */
1014 {12, 12}, /* cost of storing MMX registers
1015 in SImode and DImode */
1016 6, /* cost of moving SSE register */
1017 {12, 12, 12}, /* cost of loading SSE registers
1018 in SImode, DImode and TImode */
1019 {12, 12, 12}, /* cost of storing SSE registers
1020 in SImode, DImode and TImode */
1021 8, /* MMX or SSE register to integer */
1022 8, /* size of l1 cache. */
1023 1024, /* size of l2 cache. */
1024 128, /* size of prefetch block */
1025 8, /* number of parallel prefetches */
1026 1, /* Branch cost */
1027 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1028 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1029 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1030 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1031 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1032 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1033 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1034 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1035 {100000, unrolled_loop}, {-1, libcall}}}},
1036 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1038 {libcall, {{24, loop}, {64, unrolled_loop},
1039 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1040 1, /* scalar_stmt_cost. */
1041 1, /* scalar load_cost. */
1042 1, /* scalar_store_cost. */
1043 1, /* vec_stmt_cost. */
1044 1, /* vec_to_scalar_cost. */
1045 1, /* scalar_to_vec_cost. */
1046 1, /* vec_align_load_cost. */
1047 2, /* vec_unalign_load_cost. */
1048 1, /* vec_store_cost. */
1049 3, /* cond_taken_branch_cost. */
1050 1, /* cond_not_taken_branch_cost. */
1054 struct processor_costs core2_cost = {
1055 COSTS_N_INSNS (1), /* cost of an add instruction */
1056 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1057 COSTS_N_INSNS (1), /* variable shift costs */
1058 COSTS_N_INSNS (1), /* constant shift costs */
1059 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1060 COSTS_N_INSNS (3), /* HI */
1061 COSTS_N_INSNS (3), /* SI */
1062 COSTS_N_INSNS (3), /* DI */
1063 COSTS_N_INSNS (3)}, /* other */
1064 0, /* cost of multiply per each bit set */
1065 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
1066 COSTS_N_INSNS (22), /* HI */
1067 COSTS_N_INSNS (22), /* SI */
1068 COSTS_N_INSNS (22), /* DI */
1069 COSTS_N_INSNS (22)}, /* other */
1070 COSTS_N_INSNS (1), /* cost of movsx */
1071 COSTS_N_INSNS (1), /* cost of movzx */
1072 8, /* "large" insn */
1073 16, /* MOVE_RATIO */
1074 2, /* cost for loading QImode using movzbl */
1075 {6, 6, 6}, /* cost of loading integer registers
1076 in QImode, HImode and SImode.
1077 Relative to reg-reg move (2). */
1078 {4, 4, 4}, /* cost of storing integer registers */
1079 2, /* cost of reg,reg fld/fst */
1080 {6, 6, 6}, /* cost of loading fp registers
1081 in SFmode, DFmode and XFmode */
1082 {4, 4, 4}, /* cost of storing fp registers
1083 in SFmode, DFmode and XFmode */
1084 2, /* cost of moving MMX register */
1085 {6, 6}, /* cost of loading MMX registers
1086 in SImode and DImode */
1087 {4, 4}, /* cost of storing MMX registers
1088 in SImode and DImode */
1089 2, /* cost of moving SSE register */
1090 {6, 6, 6}, /* cost of loading SSE registers
1091 in SImode, DImode and TImode */
1092 {4, 4, 4}, /* cost of storing SSE registers
1093 in SImode, DImode and TImode */
1094 2, /* MMX or SSE register to integer */
1095 32, /* size of l1 cache. */
1096 2048, /* size of l2 cache. */
1097 128, /* size of prefetch block */
1098 8, /* number of parallel prefetches */
1099 3, /* Branch cost */
1100 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1101 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1102 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1103 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1104 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1105 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1106 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1107 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1108 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1109 {{libcall, {{8, loop}, {15, unrolled_loop},
1110 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1111 {libcall, {{24, loop}, {32, unrolled_loop},
1112 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1113 1, /* scalar_stmt_cost. */
1114 1, /* scalar load_cost. */
1115 1, /* scalar_store_cost. */
1116 1, /* vec_stmt_cost. */
1117 1, /* vec_to_scalar_cost. */
1118 1, /* scalar_to_vec_cost. */
1119 1, /* vec_align_load_cost. */
1120 2, /* vec_unalign_load_cost. */
1121 1, /* vec_store_cost. */
1122 3, /* cond_taken_branch_cost. */
1123 1, /* cond_not_taken_branch_cost. */
1127 struct processor_costs atom_cost = {
1128 COSTS_N_INSNS (1), /* cost of an add instruction */
1129 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1130 COSTS_N_INSNS (1), /* variable shift costs */
1131 COSTS_N_INSNS (1), /* constant shift costs */
1132 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1133 COSTS_N_INSNS (4), /* HI */
1134 COSTS_N_INSNS (3), /* SI */
1135 COSTS_N_INSNS (4), /* DI */
1136 COSTS_N_INSNS (2)}, /* other */
1137 0, /* cost of multiply per each bit set */
1138 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1139 COSTS_N_INSNS (26), /* HI */
1140 COSTS_N_INSNS (42), /* SI */
1141 COSTS_N_INSNS (74), /* DI */
1142 COSTS_N_INSNS (74)}, /* other */
1143 COSTS_N_INSNS (1), /* cost of movsx */
1144 COSTS_N_INSNS (1), /* cost of movzx */
1145 8, /* "large" insn */
1146 17, /* MOVE_RATIO */
1147 2, /* cost for loading QImode using movzbl */
1148 {4, 4, 4}, /* cost of loading integer registers
1149 in QImode, HImode and SImode.
1150 Relative to reg-reg move (2). */
1151 {4, 4, 4}, /* cost of storing integer registers */
1152 4, /* cost of reg,reg fld/fst */
1153 {12, 12, 12}, /* cost of loading fp registers
1154 in SFmode, DFmode and XFmode */
1155 {6, 6, 8}, /* cost of storing fp registers
1156 in SFmode, DFmode and XFmode */
1157 2, /* cost of moving MMX register */
1158 {8, 8}, /* cost of loading MMX registers
1159 in SImode and DImode */
1160 {8, 8}, /* cost of storing MMX registers
1161 in SImode and DImode */
1162 2, /* cost of moving SSE register */
1163 {8, 8, 8}, /* cost of loading SSE registers
1164 in SImode, DImode and TImode */
1165 {8, 8, 8}, /* cost of storing SSE registers
1166 in SImode, DImode and TImode */
1167 5, /* MMX or SSE register to integer */
1168 32, /* size of l1 cache. */
1169 256, /* size of l2 cache. */
1170 64, /* size of prefetch block */
1171 6, /* number of parallel prefetches */
1172 3, /* Branch cost */
1173 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1174 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1175 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1176 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1177 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1178 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1179 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1180 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1181 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1182 {{libcall, {{8, loop}, {15, unrolled_loop},
1183 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1184 {libcall, {{24, loop}, {32, unrolled_loop},
1185 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1186 1, /* scalar_stmt_cost. */
1187 1, /* scalar load_cost. */
1188 1, /* scalar_store_cost. */
1189 1, /* vec_stmt_cost. */
1190 1, /* vec_to_scalar_cost. */
1191 1, /* scalar_to_vec_cost. */
1192 1, /* vec_align_load_cost. */
1193 2, /* vec_unalign_load_cost. */
1194 1, /* vec_store_cost. */
1195 3, /* cond_taken_branch_cost. */
1196 1, /* cond_not_taken_branch_cost. */
1199 /* Generic64 should produce code tuned for Nocona and K8. */
1201 struct processor_costs generic64_cost = {
1202 COSTS_N_INSNS (1), /* cost of an add instruction */
1203 /* On all chips taken into consideration lea is 2 cycles and more. With
1204 this cost however our current implementation of synth_mult results in
1205 use of unnecessary temporary registers causing regression on several
1206 SPECfp benchmarks. */
1207 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1208 COSTS_N_INSNS (1), /* variable shift costs */
1209 COSTS_N_INSNS (1), /* constant shift costs */
1210 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1211 COSTS_N_INSNS (4), /* HI */
1212 COSTS_N_INSNS (3), /* SI */
1213 COSTS_N_INSNS (4), /* DI */
1214 COSTS_N_INSNS (2)}, /* other */
1215 0, /* cost of multiply per each bit set */
1216 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1217 COSTS_N_INSNS (26), /* HI */
1218 COSTS_N_INSNS (42), /* SI */
1219 COSTS_N_INSNS (74), /* DI */
1220 COSTS_N_INSNS (74)}, /* other */
1221 COSTS_N_INSNS (1), /* cost of movsx */
1222 COSTS_N_INSNS (1), /* cost of movzx */
1223 8, /* "large" insn */
1224 17, /* MOVE_RATIO */
1225 4, /* cost for loading QImode using movzbl */
1226 {4, 4, 4}, /* cost of loading integer registers
1227 in QImode, HImode and SImode.
1228 Relative to reg-reg move (2). */
1229 {4, 4, 4}, /* cost of storing integer registers */
1230 4, /* cost of reg,reg fld/fst */
1231 {12, 12, 12}, /* cost of loading fp registers
1232 in SFmode, DFmode and XFmode */
1233 {6, 6, 8}, /* cost of storing fp registers
1234 in SFmode, DFmode and XFmode */
1235 2, /* cost of moving MMX register */
1236 {8, 8}, /* cost of loading MMX registers
1237 in SImode and DImode */
1238 {8, 8}, /* cost of storing MMX registers
1239 in SImode and DImode */
1240 2, /* cost of moving SSE register */
1241 {8, 8, 8}, /* cost of loading SSE registers
1242 in SImode, DImode and TImode */
1243 {8, 8, 8}, /* cost of storing SSE registers
1244 in SImode, DImode and TImode */
1245 5, /* MMX or SSE register to integer */
1246 32, /* size of l1 cache. */
1247 512, /* size of l2 cache. */
1248 64, /* size of prefetch block */
1249 6, /* number of parallel prefetches */
1250 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1251 value is increased to perhaps more appropriate value of 5. */
1252 3, /* Branch cost */
1253 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1254 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1255 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1256 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1257 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1258 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1259 {DUMMY_STRINGOP_ALGS,
1260 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1261 {DUMMY_STRINGOP_ALGS,
1262 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1263 1, /* scalar_stmt_cost. */
1264 1, /* scalar load_cost. */
1265 1, /* scalar_store_cost. */
1266 1, /* vec_stmt_cost. */
1267 1, /* vec_to_scalar_cost. */
1268 1, /* scalar_to_vec_cost. */
1269 1, /* vec_align_load_cost. */
1270 2, /* vec_unalign_load_cost. */
1271 1, /* vec_store_cost. */
1272 3, /* cond_taken_branch_cost. */
1273 1, /* cond_not_taken_branch_cost. */
1276 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1279 struct processor_costs generic32_cost = {
1280 COSTS_N_INSNS (1), /* cost of an add instruction */
1281 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1282 COSTS_N_INSNS (1), /* variable shift costs */
1283 COSTS_N_INSNS (1), /* constant shift costs */
1284 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1285 COSTS_N_INSNS (4), /* HI */
1286 COSTS_N_INSNS (3), /* SI */
1287 COSTS_N_INSNS (4), /* DI */
1288 COSTS_N_INSNS (2)}, /* other */
1289 0, /* cost of multiply per each bit set */
1290 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1291 COSTS_N_INSNS (26), /* HI */
1292 COSTS_N_INSNS (42), /* SI */
1293 COSTS_N_INSNS (74), /* DI */
1294 COSTS_N_INSNS (74)}, /* other */
1295 COSTS_N_INSNS (1), /* cost of movsx */
1296 COSTS_N_INSNS (1), /* cost of movzx */
1297 8, /* "large" insn */
1298 17, /* MOVE_RATIO */
1299 4, /* cost for loading QImode using movzbl */
1300 {4, 4, 4}, /* cost of loading integer registers
1301 in QImode, HImode and SImode.
1302 Relative to reg-reg move (2). */
1303 {4, 4, 4}, /* cost of storing integer registers */
1304 4, /* cost of reg,reg fld/fst */
1305 {12, 12, 12}, /* cost of loading fp registers
1306 in SFmode, DFmode and XFmode */
1307 {6, 6, 8}, /* cost of storing fp registers
1308 in SFmode, DFmode and XFmode */
1309 2, /* cost of moving MMX register */
1310 {8, 8}, /* cost of loading MMX registers
1311 in SImode and DImode */
1312 {8, 8}, /* cost of storing MMX registers
1313 in SImode and DImode */
1314 2, /* cost of moving SSE register */
1315 {8, 8, 8}, /* cost of loading SSE registers
1316 in SImode, DImode and TImode */
1317 {8, 8, 8}, /* cost of storing SSE registers
1318 in SImode, DImode and TImode */
1319 5, /* MMX or SSE register to integer */
1320 32, /* size of l1 cache. */
1321 256, /* size of l2 cache. */
1322 64, /* size of prefetch block */
1323 6, /* number of parallel prefetches */
1324 3, /* Branch cost */
1325 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1326 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1327 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1328 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1329 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1330 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1331 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1332 DUMMY_STRINGOP_ALGS},
1333 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1334 DUMMY_STRINGOP_ALGS},
1335 1, /* scalar_stmt_cost. */
1336 1, /* scalar load_cost. */
1337 1, /* scalar_store_cost. */
1338 1, /* vec_stmt_cost. */
1339 1, /* vec_to_scalar_cost. */
1340 1, /* scalar_to_vec_cost. */
1341 1, /* vec_align_load_cost. */
1342 2, /* vec_unalign_load_cost. */
1343 1, /* vec_store_cost. */
1344 3, /* cond_taken_branch_cost. */
1345 1, /* cond_not_taken_branch_cost. */
1348 const struct processor_costs *ix86_cost = &pentium_cost;
1350 /* Processor feature/optimization bitmasks. */
1351 #define m_386 (1<<PROCESSOR_I386)
1352 #define m_486 (1<<PROCESSOR_I486)
1353 #define m_PENT (1<<PROCESSOR_PENTIUM)
1354 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1355 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1356 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1357 #define m_CORE2 (1<<PROCESSOR_CORE2)
1358 #define m_ATOM (1<<PROCESSOR_ATOM)
1360 #define m_GEODE (1<<PROCESSOR_GEODE)
1361 #define m_K6 (1<<PROCESSOR_K6)
1362 #define m_K6_GEODE (m_K6 | m_GEODE)
1363 #define m_K8 (1<<PROCESSOR_K8)
1364 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1365 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1366 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1367 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1368 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10 | m_BDVER1)
1370 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1371 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1373 /* Generic instruction choice should be common subset of supported CPUs
1374 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1375 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1377 /* Feature tests against the various tunings. */
1378 unsigned char ix86_tune_features[X86_TUNE_LAST];
1380 /* Feature tests against the various tunings used to create ix86_tune_features
1381 based on the processor mask. */
1382 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1383 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1384 negatively, so enabling for Generic64 seems like good code size
1385 tradeoff. We can't enable it for 32bit generic because it does not
1386 work well with PPro base chips. */
1387 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1389 /* X86_TUNE_PUSH_MEMORY */
1390 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1391 | m_NOCONA | m_CORE2 | m_GENERIC,
1393 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1396 /* X86_TUNE_UNROLL_STRLEN */
1397 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1398 | m_CORE2 | m_GENERIC,
1400 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1401 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1403 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1404 on simulation result. But after P4 was made, no performance benefit
1405 was observed with branch hints. It also increases the code size.
1406 As a result, icc never generates branch hints. */
1409 /* X86_TUNE_DOUBLE_WITH_ADD */
1412 /* X86_TUNE_USE_SAHF */
1413 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER1 | m_PENT4
1414 | m_NOCONA | m_CORE2 | m_GENERIC,
1416 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1417 partial dependencies. */
1418 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1419 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1421 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1422 register stalls on Generic32 compilation setting as well. However
1423 in current implementation the partial register stalls are not eliminated
1424 very well - they can be introduced via subregs synthesized by combine
1425 and can happen in caller/callee saving sequences. Because this option
1426 pays back little on PPro based chips and is in conflict with partial reg
1427 dependencies used by Athlon/P4 based chips, it is better to leave it off
1428 for generic32 for now. */
1431 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1432 m_CORE2 | m_GENERIC,
1434 /* X86_TUNE_USE_HIMODE_FIOP */
1435 m_386 | m_486 | m_K6_GEODE,
1437 /* X86_TUNE_USE_SIMODE_FIOP */
1438 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
1440 /* X86_TUNE_USE_MOV0 */
1443 /* X86_TUNE_USE_CLTD */
1444 ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
1446 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1449 /* X86_TUNE_SPLIT_LONG_MOVES */
1452 /* X86_TUNE_READ_MODIFY_WRITE */
1455 /* X86_TUNE_READ_MODIFY */
1458 /* X86_TUNE_PROMOTE_QIMODE */
1459 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1460 | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
1462 /* X86_TUNE_FAST_PREFIX */
1463 ~(m_PENT | m_486 | m_386),
1465 /* X86_TUNE_SINGLE_STRINGOP */
1466 m_386 | m_PENT4 | m_NOCONA,
1468 /* X86_TUNE_QIMODE_MATH */
1471 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1472 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1473 might be considered for Generic32 if our scheme for avoiding partial
1474 stalls was more effective. */
1477 /* X86_TUNE_PROMOTE_QI_REGS */
1480 /* X86_TUNE_PROMOTE_HI_REGS */
1483 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
1484 over esp addition. */
1485 m_386 | m_486 | m_PENT | m_PPRO,
1487 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
1488 over esp addition. */
1491 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
1492 over esp subtraction. */
1493 m_386 | m_486 | m_PENT | m_K6_GEODE,
1495 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
1496 over esp subtraction. */
1497 m_PENT | m_K6_GEODE,
1499 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1500 for DFmode copies */
1501 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1502 | m_GENERIC | m_GEODE),
1504 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1505 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1507 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1508 conflict here in between PPro/Pentium4 based chips that thread 128bit
1509 SSE registers as single units versus K8 based chips that divide SSE
1510 registers to two 64bit halves. This knob promotes all store destinations
1511 to be 128bit to allow register renaming on 128bit SSE units, but usually
1512 results in one extra microop on 64bit SSE units. Experimental results
1513 shows that disabling this option on P4 brings over 20% SPECfp regression,
1514 while enabling it on K8 brings roughly 2.4% regression that can be partly
1515 masked by careful scheduling of moves. */
1516 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
1517 | m_AMDFAM10 | m_BDVER1,
1519 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
1520 m_AMDFAM10 | m_BDVER1,
1522 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
1525 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
1528 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1529 are resolved on SSE register parts instead of whole registers, so we may
1530 maintain just lower part of scalar values in proper format leaving the
1531 upper part undefined. */
1534 /* X86_TUNE_SSE_TYPELESS_STORES */
1537 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1538 m_PPRO | m_PENT4 | m_NOCONA,
1540 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1541 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1543 /* X86_TUNE_PROLOGUE_USING_MOVE */
1544 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1546 /* X86_TUNE_EPILOGUE_USING_MOVE */
1547 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1549 /* X86_TUNE_SHIFT1 */
1552 /* X86_TUNE_USE_FFREEP */
1555 /* X86_TUNE_INTER_UNIT_MOVES */
1556 ~(m_AMD_MULTIPLE | m_GENERIC),
1558 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1559 ~(m_AMDFAM10 | m_BDVER1),
1561 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1562 than 4 branch instructions in the 16 byte window. */
1563 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
1566 /* X86_TUNE_SCHEDULE */
1567 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
1570 /* X86_TUNE_USE_BT */
1571 m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
1573 /* X86_TUNE_USE_INCDEC */
1574 ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
1576 /* X86_TUNE_PAD_RETURNS */
1577 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1579 /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
1582 /* X86_TUNE_EXT_80387_CONSTANTS */
1583 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
1584 | m_CORE2 | m_GENERIC,
1586 /* X86_TUNE_SHORTEN_X87_SSE */
1589 /* X86_TUNE_AVOID_VECTOR_DECODE */
1592 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1593 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1596 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1597 vector path on AMD machines. */
1598 m_K8 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1,
1600 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1602 m_K8 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1,
1604 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1608 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1609 but one byte longer. */
1612 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1613 operand that cannot be represented using a modRM byte. The XOR
1614 replacement is long decoded, so this split helps here as well. */
1617 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1619 m_AMDFAM10 | m_GENERIC,
1621 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1622 from integer to FP. */
1625 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1626 with a subsequent conditional jump instruction into a single
1627 compare-and-branch uop. */
1630 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1631 will impact LEA instruction selection. */
1634 /* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
1639 /* Feature tests against the various architecture variations. */
1640 unsigned char ix86_arch_features[X86_ARCH_LAST];
1642 /* Feature tests against the various architecture variations, used to create
1643 ix86_arch_features based on the processor mask. */
1644 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1645 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1646 ~(m_386 | m_486 | m_PENT | m_K6),
1648 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1651 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1654 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1657 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1661 static const unsigned int x86_accumulate_outgoing_args
1662 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1665 static const unsigned int x86_arch_always_fancy_math_387
1666 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1667 | m_NOCONA | m_CORE2 | m_GENERIC;
1669 static enum stringop_alg stringop_alg = no_stringop;
1671 /* In case the average insn count for single function invocation is
1672 lower than this constant, emit fast (but longer) prologue and
1674 #define FAST_PROLOGUE_INSN_COUNT 20
1676 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1677 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1678 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1679 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1681 /* Array of the smallest class containing reg number REGNO, indexed by
1682 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1684 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1686 /* ax, dx, cx, bx */
1687 AREG, DREG, CREG, BREG,
1688 /* si, di, bp, sp */
1689 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1691 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1692 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1695 /* flags, fpsr, fpcr, frame */
1696 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1698 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1701 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1704 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1705 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1706 /* SSE REX registers */
1707 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1711 /* The "default" register map used in 32bit mode. */
1713 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1715 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1716 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1717 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1718 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1719 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1720 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1721 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1724 /* The "default" register map used in 64bit mode. */
1726 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1728 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1729 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1730 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1731 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1732 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1733 8,9,10,11,12,13,14,15, /* extended integer registers */
1734 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1737 /* Define the register numbers to be used in Dwarf debugging information.
1738 The SVR4 reference port C compiler uses the following register numbers
1739 in its Dwarf output code:
1740 0 for %eax (gcc regno = 0)
1741 1 for %ecx (gcc regno = 2)
1742 2 for %edx (gcc regno = 1)
1743 3 for %ebx (gcc regno = 3)
1744 4 for %esp (gcc regno = 7)
1745 5 for %ebp (gcc regno = 6)
1746 6 for %esi (gcc regno = 4)
1747 7 for %edi (gcc regno = 5)
1748 The following three DWARF register numbers are never generated by
1749 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1750 believes these numbers have these meanings.
1751 8 for %eip (no gcc equivalent)
1752 9 for %eflags (gcc regno = 17)
1753 10 for %trapno (no gcc equivalent)
1754 It is not at all clear how we should number the FP stack registers
1755 for the x86 architecture. If the version of SDB on x86/svr4 were
1756 a bit less brain dead with respect to floating-point then we would
1757 have a precedent to follow with respect to DWARF register numbers
1758 for x86 FP registers, but the SDB on x86/svr4 is so completely
1759 broken with respect to FP registers that it is hardly worth thinking
1760 of it as something to strive for compatibility with.
1761 The version of x86/svr4 SDB I have at the moment does (partially)
1762 seem to believe that DWARF register number 11 is associated with
1763 the x86 register %st(0), but that's about all. Higher DWARF
1764 register numbers don't seem to be associated with anything in
1765 particular, and even for DWARF regno 11, SDB only seems to under-
1766 stand that it should say that a variable lives in %st(0) (when
1767 asked via an `=' command) if we said it was in DWARF regno 11,
1768 but SDB still prints garbage when asked for the value of the
1769 variable in question (via a `/' command).
1770 (Also note that the labels SDB prints for various FP stack regs
1771 when doing an `x' command are all wrong.)
1772 Note that these problems generally don't affect the native SVR4
1773 C compiler because it doesn't allow the use of -O with -g and
1774 because when it is *not* optimizing, it allocates a memory
1775 location for each floating-point variable, and the memory
1776 location is what gets described in the DWARF AT_location
1777 attribute for the variable in question.
1778 Regardless of the severe mental illness of the x86/svr4 SDB, we
1779 do something sensible here and we use the following DWARF
1780 register numbers. Note that these are all stack-top-relative
1782 11 for %st(0) (gcc regno = 8)
1783 12 for %st(1) (gcc regno = 9)
1784 13 for %st(2) (gcc regno = 10)
1785 14 for %st(3) (gcc regno = 11)
1786 15 for %st(4) (gcc regno = 12)
1787 16 for %st(5) (gcc regno = 13)
1788 17 for %st(6) (gcc regno = 14)
1789 18 for %st(7) (gcc regno = 15)
1791 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1793 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1794 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1795 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1796 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1797 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1798 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1799 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1802 /* Define parameter passing and return registers. */
1804 static int const x86_64_int_parameter_registers[6] =
1806 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
1809 static int const x86_64_ms_abi_int_parameter_registers[4] =
1811 CX_REG, DX_REG, R8_REG, R9_REG
1814 static int const x86_64_int_return_registers[4] =
1816 AX_REG, DX_REG, DI_REG, SI_REG
1819 /* Define the structure for the machine field in struct function. */
1821 struct GTY(()) stack_local_entry {
1822 unsigned short mode;
1825 struct stack_local_entry *next;
1828 /* Structure describing stack frame layout.
1829 Stack grows downward:
1835 saved static chain if ix86_static_chain_on_stack
1837 saved frame pointer if frame_pointer_needed
1838 <- HARD_FRAME_POINTER
1844 <- sse_regs_save_offset
1847 [va_arg registers] |
1851 [padding2] | = to_allocate
1860 int outgoing_arguments_size;
1861 HOST_WIDE_INT frame;
1863 /* The offsets relative to ARG_POINTER. */
1864 HOST_WIDE_INT frame_pointer_offset;
1865 HOST_WIDE_INT hard_frame_pointer_offset;
1866 HOST_WIDE_INT stack_pointer_offset;
1867 HOST_WIDE_INT reg_save_offset;
1868 HOST_WIDE_INT sse_reg_save_offset;
1870 /* When save_regs_using_mov is set, emit prologue using
1871 move instead of push instructions. */
1872 bool save_regs_using_mov;
1875 /* Code model option. */
1876 enum cmodel ix86_cmodel;
1878 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1880 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1882 /* Which unit we are generating floating point math for. */
1883 enum fpmath_unit ix86_fpmath;
1885 /* Which cpu are we scheduling for. */
1886 enum attr_cpu ix86_schedule;
1888 /* Which cpu are we optimizing for. */
1889 enum processor_type ix86_tune;
1891 /* Which instruction set architecture to use. */
1892 enum processor_type ix86_arch;
1894 /* true if sse prefetch instruction is not NOOP. */
1895 int x86_prefetch_sse;
1897 /* ix86_regparm_string as a number */
1898 static int ix86_regparm;
1900 /* -mstackrealign option */
1901 static const char ix86_force_align_arg_pointer_string[]
1902 = "force_align_arg_pointer";
1904 static rtx (*ix86_gen_leave) (void);
1905 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1906 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1907 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
1908 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1909 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1910 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1911 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
1912 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
1913 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
1915 /* Preferred alignment for stack boundary in bits. */
1916 unsigned int ix86_preferred_stack_boundary;
1918 /* Alignment for incoming stack boundary in bits specified at
1920 static unsigned int ix86_user_incoming_stack_boundary;
1922 /* Default alignment for incoming stack boundary in bits. */
1923 static unsigned int ix86_default_incoming_stack_boundary;
1925 /* Alignment for incoming stack boundary in bits. */
1926 unsigned int ix86_incoming_stack_boundary;
1928 /* The abi used by target. */
1929 enum calling_abi ix86_abi;
1931 /* Values 1-5: see jump.c */
1932 int ix86_branch_cost;
1934 /* Calling abi specific va_list type nodes. */
1935 static GTY(()) tree sysv_va_list_type_node;
1936 static GTY(()) tree ms_va_list_type_node;
1938 /* Variables which are this size or smaller are put in the data/bss
1939 or ldata/lbss sections. */
1941 int ix86_section_threshold = 65536;
1943 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1944 char internal_label_prefix[16];
1945 int internal_label_prefix_len;
1947 /* Fence to use after loop using movnt. */
1950 /* Register class used for passing given 64bit part of the argument.
1951 These represent classes as documented by the PS ABI, with the exception
1952 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1953 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1955 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1956 whenever possible (upper half does contain padding). */
1957 enum x86_64_reg_class
1960 X86_64_INTEGER_CLASS,
1961 X86_64_INTEGERSI_CLASS,
1968 X86_64_COMPLEX_X87_CLASS,
1972 #define MAX_CLASSES 4
1974 /* Table of constants used by fldpi, fldln2, etc.... */
1975 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1976 static bool ext_80387_constants_init = 0;
1979 static struct machine_function * ix86_init_machine_status (void);
1980 static rtx ix86_function_value (const_tree, const_tree, bool);
1981 static bool ix86_function_value_regno_p (const unsigned int);
1982 static rtx ix86_static_chain (const_tree, bool);
1983 static int ix86_function_regparm (const_tree, const_tree);
1984 static void ix86_compute_frame_layout (struct ix86_frame *);
1985 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1987 static void ix86_add_new_builtins (int);
1988 static rtx ix86_expand_vec_perm_builtin (tree);
1989 static tree ix86_canonical_va_list_type (tree);
1990 static void predict_jump (int);
1991 static unsigned int split_stack_prologue_scratch_regno (void);
1992 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
1994 enum ix86_function_specific_strings
1996 IX86_FUNCTION_SPECIFIC_ARCH,
1997 IX86_FUNCTION_SPECIFIC_TUNE,
1998 IX86_FUNCTION_SPECIFIC_FPMATH,
1999 IX86_FUNCTION_SPECIFIC_MAX
2002 static char *ix86_target_string (int, int, const char *, const char *,
2003 const char *, bool);
2004 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
2005 static void ix86_function_specific_save (struct cl_target_option *);
2006 static void ix86_function_specific_restore (struct cl_target_option *);
2007 static void ix86_function_specific_print (FILE *, int,
2008 struct cl_target_option *);
2009 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2010 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
2011 static bool ix86_can_inline_p (tree, tree);
2012 static void ix86_set_current_function (tree);
2013 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2015 static enum calling_abi ix86_function_abi (const_tree);
2018 #ifndef SUBTARGET32_DEFAULT_CPU
2019 #define SUBTARGET32_DEFAULT_CPU "i386"
2022 /* The svr4 ABI for the i386 says that records and unions are returned
2024 #ifndef DEFAULT_PCC_STRUCT_RETURN
2025 #define DEFAULT_PCC_STRUCT_RETURN 1
2028 /* Whether -mtune= or -march= were specified */
2029 static int ix86_tune_defaulted;
2030 static int ix86_arch_specified;
2032 /* A mask of ix86_isa_flags that includes bit X if X
2033 was set or cleared on the command line. */
2034 static int ix86_isa_flags_explicit;
2036 /* Define a set of ISAs which are available when a given ISA is
2037 enabled. MMX and SSE ISAs are handled separately. */
2039 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
2040 #define OPTION_MASK_ISA_3DNOW_SET \
2041 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
2043 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
2044 #define OPTION_MASK_ISA_SSE2_SET \
2045 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
2046 #define OPTION_MASK_ISA_SSE3_SET \
2047 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
2048 #define OPTION_MASK_ISA_SSSE3_SET \
2049 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
2050 #define OPTION_MASK_ISA_SSE4_1_SET \
2051 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
2052 #define OPTION_MASK_ISA_SSE4_2_SET \
2053 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
2054 #define OPTION_MASK_ISA_AVX_SET \
2055 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
2056 #define OPTION_MASK_ISA_FMA_SET \
2057 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
2059 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
2061 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
2063 #define OPTION_MASK_ISA_SSE4A_SET \
2064 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
2065 #define OPTION_MASK_ISA_FMA4_SET \
2066 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \
2067 | OPTION_MASK_ISA_AVX_SET)
2068 #define OPTION_MASK_ISA_XOP_SET \
2069 (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET)
2070 #define OPTION_MASK_ISA_LWP_SET \
2073 /* AES and PCLMUL need SSE2 because they use xmm registers */
2074 #define OPTION_MASK_ISA_AES_SET \
2075 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
2076 #define OPTION_MASK_ISA_PCLMUL_SET \
2077 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
2079 #define OPTION_MASK_ISA_ABM_SET \
2080 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
2082 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
2083 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
2084 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
2085 #define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
2086 #define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
2088 #define OPTION_MASK_ISA_FSGSBASE_SET OPTION_MASK_ISA_FSGSBASE
2089 #define OPTION_MASK_ISA_RDRND_SET OPTION_MASK_ISA_RDRND
2090 #define OPTION_MASK_ISA_F16C_SET \
2091 (OPTION_MASK_ISA_F16C | OPTION_MASK_ISA_AVX_SET)
2093 /* Define a set of ISAs which aren't available when a given ISA is
2094 disabled. MMX and SSE ISAs are handled separately. */
2096 #define OPTION_MASK_ISA_MMX_UNSET \
2097 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
2098 #define OPTION_MASK_ISA_3DNOW_UNSET \
2099 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
2100 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
2102 #define OPTION_MASK_ISA_SSE_UNSET \
2103 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
2104 #define OPTION_MASK_ISA_SSE2_UNSET \
2105 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
2106 #define OPTION_MASK_ISA_SSE3_UNSET \
2107 (OPTION_MASK_ISA_SSE3 \
2108 | OPTION_MASK_ISA_SSSE3_UNSET \
2109 | OPTION_MASK_ISA_SSE4A_UNSET )
2110 #define OPTION_MASK_ISA_SSSE3_UNSET \
2111 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
2112 #define OPTION_MASK_ISA_SSE4_1_UNSET \
2113 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
2114 #define OPTION_MASK_ISA_SSE4_2_UNSET \
2115 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
2116 #define OPTION_MASK_ISA_AVX_UNSET \
2117 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
2118 | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET)
2119 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2121 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2123 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2125 #define OPTION_MASK_ISA_SSE4A_UNSET \
2126 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET)
2128 #define OPTION_MASK_ISA_FMA4_UNSET \
2129 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET)
2130 #define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP
2131 #define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP
2133 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2134 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2135 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2136 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2137 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2138 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2139 #define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
2140 #define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
2142 #define OPTION_MASK_ISA_FSGSBASE_UNSET OPTION_MASK_ISA_FSGSBASE
2143 #define OPTION_MASK_ISA_RDRND_UNSET OPTION_MASK_ISA_RDRND
2144 #define OPTION_MASK_ISA_F16C_UNSET OPTION_MASK_ISA_F16C
2146 /* Vectorization library interface and handlers. */
2147 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2149 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2150 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2152 /* Processor target table, indexed by processor number */
2155 const struct processor_costs *cost; /* Processor costs */
2156 const int align_loop; /* Default alignments. */
2157 const int align_loop_max_skip;
2158 const int align_jump;
2159 const int align_jump_max_skip;
2160 const int align_func;
2163 static const struct ptt processor_target_table[PROCESSOR_max] =
2165 {&i386_cost, 4, 3, 4, 3, 4},
2166 {&i486_cost, 16, 15, 16, 15, 16},
2167 {&pentium_cost, 16, 7, 16, 7, 16},
2168 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2169 {&geode_cost, 0, 0, 0, 0, 0},
2170 {&k6_cost, 32, 7, 32, 7, 32},
2171 {&athlon_cost, 16, 7, 16, 7, 16},
2172 {&pentium4_cost, 0, 0, 0, 0, 0},
2173 {&k8_cost, 16, 7, 16, 7, 16},
2174 {&nocona_cost, 0, 0, 0, 0, 0},
2175 {&core2_cost, 16, 10, 16, 10, 16},
2176 {&generic32_cost, 16, 7, 16, 7, 16},
2177 {&generic64_cost, 16, 10, 16, 10, 16},
2178 {&amdfam10_cost, 32, 24, 32, 7, 32},
2179 {&bdver1_cost, 32, 24, 32, 7, 32},
2180 {&atom_cost, 16, 7, 16, 7, 16}
2183 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2210 /* Return true if a red-zone is in use. */
2213 ix86_using_red_zone (void)
2215 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2218 /* Implement TARGET_HANDLE_OPTION. */
2221 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
2228 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2229 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2233 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2234 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2241 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2242 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2246 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2247 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2257 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2258 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2262 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2263 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2270 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2271 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2275 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2276 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2283 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2284 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2288 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2289 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2296 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2297 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2301 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2302 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2309 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2310 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2314 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2315 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2322 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2323 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2327 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2328 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2335 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2336 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2340 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2341 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2348 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2349 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2353 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2354 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2359 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2360 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2364 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2365 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2371 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2372 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2376 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2377 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2384 ix86_isa_flags |= OPTION_MASK_ISA_FMA4_SET;
2385 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_SET;
2389 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA4_UNSET;
2390 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_UNSET;
2397 ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET;
2398 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET;
2402 ix86_isa_flags &= ~OPTION_MASK_ISA_XOP_UNSET;
2403 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_UNSET;
2410 ix86_isa_flags |= OPTION_MASK_ISA_LWP_SET;
2411 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_SET;
2415 ix86_isa_flags &= ~OPTION_MASK_ISA_LWP_UNSET;
2416 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_UNSET;
2423 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2424 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2428 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2429 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2436 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2437 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2441 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2442 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2449 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2450 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2454 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2455 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2462 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2463 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2467 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2468 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2475 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
2476 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
2480 ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
2481 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
2488 ix86_isa_flags |= OPTION_MASK_ISA_CRC32_SET;
2489 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_SET;
2493 ix86_isa_flags &= ~OPTION_MASK_ISA_CRC32_UNSET;
2494 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_UNSET;
2501 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2502 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2506 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2507 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2514 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2515 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2519 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2520 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2527 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE_SET;
2528 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_SET;
2532 ix86_isa_flags &= ~OPTION_MASK_ISA_FSGSBASE_UNSET;
2533 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_UNSET;
2540 ix86_isa_flags |= OPTION_MASK_ISA_RDRND_SET;
2541 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_SET;
2545 ix86_isa_flags &= ~OPTION_MASK_ISA_RDRND_UNSET;
2546 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_UNSET;
2553 ix86_isa_flags |= OPTION_MASK_ISA_F16C_SET;
2554 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_SET;
2558 ix86_isa_flags &= ~OPTION_MASK_ISA_F16C_UNSET;
2559 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_UNSET;
2568 /* Return a string that documents the current -m options. The caller is
2569 responsible for freeing the string. */
2572 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2573 const char *fpmath, bool add_nl_p)
2575 struct ix86_target_opts
2577 const char *option; /* option string */
2578 int mask; /* isa mask options */
2581 /* This table is ordered so that options like -msse4.2 that imply
2582 preceding options while match those first. */
2583 static struct ix86_target_opts isa_opts[] =
2585 { "-m64", OPTION_MASK_ISA_64BIT },
2586 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2587 { "-mfma", OPTION_MASK_ISA_FMA },
2588 { "-mxop", OPTION_MASK_ISA_XOP },
2589 { "-mlwp", OPTION_MASK_ISA_LWP },
2590 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2591 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2592 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2593 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2594 { "-msse3", OPTION_MASK_ISA_SSE3 },
2595 { "-msse2", OPTION_MASK_ISA_SSE2 },
2596 { "-msse", OPTION_MASK_ISA_SSE },
2597 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2598 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2599 { "-mmmx", OPTION_MASK_ISA_MMX },
2600 { "-mabm", OPTION_MASK_ISA_ABM },
2601 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2602 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2603 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2604 { "-maes", OPTION_MASK_ISA_AES },
2605 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2606 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2607 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2608 { "-mf16c", OPTION_MASK_ISA_F16C },
2612 static struct ix86_target_opts flag_opts[] =
2614 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2615 { "-m80387", MASK_80387 },
2616 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2617 { "-malign-double", MASK_ALIGN_DOUBLE },
2618 { "-mcld", MASK_CLD },
2619 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2620 { "-mieee-fp", MASK_IEEE_FP },
2621 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2622 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2623 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2624 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2625 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2626 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2627 { "-mno-red-zone", MASK_NO_RED_ZONE },
2628 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2629 { "-mrecip", MASK_RECIP },
2630 { "-mrtd", MASK_RTD },
2631 { "-msseregparm", MASK_SSEREGPARM },
2632 { "-mstack-arg-probe", MASK_STACK_PROBE },
2633 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2634 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2635 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2638 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2641 char target_other[40];
2650 memset (opts, '\0', sizeof (opts));
2652 /* Add -march= option. */
2655 opts[num][0] = "-march=";
2656 opts[num++][1] = arch;
2659 /* Add -mtune= option. */
2662 opts[num][0] = "-mtune=";
2663 opts[num++][1] = tune;
2666 /* Pick out the options in isa options. */
2667 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2669 if ((isa & isa_opts[i].mask) != 0)
2671 opts[num++][0] = isa_opts[i].option;
2672 isa &= ~ isa_opts[i].mask;
2676 if (isa && add_nl_p)
2678 opts[num++][0] = isa_other;
2679 sprintf (isa_other, "(other isa: %#x)", isa);
2682 /* Add flag options. */
2683 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2685 if ((flags & flag_opts[i].mask) != 0)
2687 opts[num++][0] = flag_opts[i].option;
2688 flags &= ~ flag_opts[i].mask;
2692 if (flags && add_nl_p)
2694 opts[num++][0] = target_other;
2695 sprintf (target_other, "(other flags: %#x)", flags);
2698 /* Add -fpmath= option. */
2701 opts[num][0] = "-mfpmath=";
2702 opts[num++][1] = fpmath;
2709 gcc_assert (num < ARRAY_SIZE (opts));
2711 /* Size the string. */
2713 sep_len = (add_nl_p) ? 3 : 1;
2714 for (i = 0; i < num; i++)
2717 for (j = 0; j < 2; j++)
2719 len += strlen (opts[i][j]);
2722 /* Build the string. */
2723 ret = ptr = (char *) xmalloc (len);
2726 for (i = 0; i < num; i++)
2730 for (j = 0; j < 2; j++)
2731 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2738 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2746 for (j = 0; j < 2; j++)
2749 memcpy (ptr, opts[i][j], len2[j]);
2751 line_len += len2[j];
2756 gcc_assert (ret + len >= ptr);
2761 /* Return TRUE if software prefetching is beneficial for the
2765 software_prefetching_beneficial_p (void)
2769 case PROCESSOR_GEODE:
2771 case PROCESSOR_ATHLON:
2773 case PROCESSOR_AMDFAM10:
2781 /* Return true, if profiling code should be emitted before
2782 prologue. Otherwise it returns false.
2783 Note: For x86 with "hotfix" it is sorried. */
2785 ix86_profile_before_prologue (void)
2787 return flag_fentry != 0;
2790 /* Function that is callable from the debugger to print the current
2793 ix86_debug_options (void)
2795 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2796 ix86_arch_string, ix86_tune_string,
2797 ix86_fpmath_string, true);
2801 fprintf (stderr, "%s\n\n", opts);
2805 fputs ("<no options>\n\n", stderr);
2810 /* Override various settings based on options. If MAIN_ARGS_P, the
2811 options are from the command line, otherwise they are from
2815 ix86_option_override_internal (bool main_args_p)
2818 unsigned int ix86_arch_mask, ix86_tune_mask;
2819 const bool ix86_tune_specified = (ix86_tune_string != NULL);
2824 /* Comes from final.c -- no real reason to change it. */
2825 #define MAX_CODE_ALIGN 16
2833 PTA_PREFETCH_SSE = 1 << 4,
2835 PTA_3DNOW_A = 1 << 6,
2839 PTA_POPCNT = 1 << 10,
2841 PTA_SSE4A = 1 << 12,
2842 PTA_NO_SAHF = 1 << 13,
2843 PTA_SSE4_1 = 1 << 14,
2844 PTA_SSE4_2 = 1 << 15,
2846 PTA_PCLMUL = 1 << 17,
2849 PTA_MOVBE = 1 << 20,
2853 PTA_FSGSBASE = 1 << 24,
2854 PTA_RDRND = 1 << 25,
2860 const char *const name; /* processor name or nickname. */
2861 const enum processor_type processor;
2862 const enum attr_cpu schedule;
2863 const unsigned /*enum pta_flags*/ flags;
2865 const processor_alias_table[] =
2867 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2868 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2869 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2870 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2871 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2872 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2873 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2874 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2875 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2876 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2877 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2878 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2879 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2881 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2883 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2884 PTA_MMX | PTA_SSE | PTA_SSE2},
2885 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2886 PTA_MMX |PTA_SSE | PTA_SSE2},
2887 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2888 PTA_MMX | PTA_SSE | PTA_SSE2},
2889 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2890 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2891 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2892 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2893 | PTA_CX16 | PTA_NO_SAHF},
2894 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2895 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2896 | PTA_SSSE3 | PTA_CX16},
2897 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2898 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2899 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
2900 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2901 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2902 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2903 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2904 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2905 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2906 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2907 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2908 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2909 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2910 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2911 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2912 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2913 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2914 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2915 {"x86-64", PROCESSOR_K8, CPU_K8,
2916 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2917 {"k8", PROCESSOR_K8, CPU_K8,
2918 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2919 | PTA_SSE2 | PTA_NO_SAHF},
2920 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2921 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2922 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2923 {"opteron", PROCESSOR_K8, CPU_K8,
2924 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2925 | PTA_SSE2 | PTA_NO_SAHF},
2926 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2927 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2928 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2929 {"athlon64", PROCESSOR_K8, CPU_K8,
2930 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2931 | PTA_SSE2 | PTA_NO_SAHF},
2932 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2933 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2934 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2935 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2936 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2937 | PTA_SSE2 | PTA_NO_SAHF},
2938 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2939 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2940 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2941 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2942 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2943 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2944 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
2945 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2946 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM
2947 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES
2948 | PTA_PCLMUL | PTA_AVX | PTA_FMA4 | PTA_XOP | PTA_LWP},
2949 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2950 0 /* flags are only used for -march switch. */ },
2951 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2952 PTA_64BIT /* flags are only used for -march switch. */ },
2955 int const pta_size = ARRAY_SIZE (processor_alias_table);
2957 /* Set up prefix/suffix so the error messages refer to either the command
2958 line argument, or the attribute(target). */
2967 prefix = "option(\"";
2972 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2973 SUBTARGET_OVERRIDE_OPTIONS;
2976 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2977 SUBSUBTARGET_OVERRIDE_OPTIONS;
2980 /* -fPIC is the default for x86_64. */
2981 if (TARGET_MACHO && TARGET_64BIT)
2984 /* Need to check -mtune=generic first. */
2985 if (ix86_tune_string)
2987 if (!strcmp (ix86_tune_string, "generic")
2988 || !strcmp (ix86_tune_string, "i686")
2989 /* As special support for cross compilers we read -mtune=native
2990 as -mtune=generic. With native compilers we won't see the
2991 -mtune=native, as it was changed by the driver. */
2992 || !strcmp (ix86_tune_string, "native"))
2995 ix86_tune_string = "generic64";
2997 ix86_tune_string = "generic32";
2999 /* If this call is for setting the option attribute, allow the
3000 generic32/generic64 that was previously set. */
3001 else if (!main_args_p
3002 && (!strcmp (ix86_tune_string, "generic32")
3003 || !strcmp (ix86_tune_string, "generic64")))
3005 else if (!strncmp (ix86_tune_string, "generic", 7))
3006 error ("bad value (%s) for %stune=%s %s",
3007 ix86_tune_string, prefix, suffix, sw);
3008 else if (!strcmp (ix86_tune_string, "x86-64"))
3009 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
3010 "%stune=k8%s or %stune=generic%s instead as appropriate.",
3011 prefix, suffix, prefix, suffix, prefix, suffix);
3015 if (ix86_arch_string)
3016 ix86_tune_string = ix86_arch_string;
3017 if (!ix86_tune_string)
3019 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3020 ix86_tune_defaulted = 1;
3023 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3024 need to use a sensible tune option. */
3025 if (!strcmp (ix86_tune_string, "generic")
3026 || !strcmp (ix86_tune_string, "x86-64")
3027 || !strcmp (ix86_tune_string, "i686"))
3030 ix86_tune_string = "generic64";
3032 ix86_tune_string = "generic32";
3036 if (ix86_stringop_string)
3038 if (!strcmp (ix86_stringop_string, "rep_byte"))
3039 stringop_alg = rep_prefix_1_byte;
3040 else if (!strcmp (ix86_stringop_string, "libcall"))
3041 stringop_alg = libcall;
3042 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
3043 stringop_alg = rep_prefix_4_byte;
3044 else if (!strcmp (ix86_stringop_string, "rep_8byte")
3046 /* rep; movq isn't available in 32-bit code. */
3047 stringop_alg = rep_prefix_8_byte;
3048 else if (!strcmp (ix86_stringop_string, "byte_loop"))
3049 stringop_alg = loop_1_byte;
3050 else if (!strcmp (ix86_stringop_string, "loop"))
3051 stringop_alg = loop;
3052 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
3053 stringop_alg = unrolled_loop;
3055 error ("bad value (%s) for %sstringop-strategy=%s %s",
3056 ix86_stringop_string, prefix, suffix, sw);
3059 if (!ix86_arch_string)
3060 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3062 ix86_arch_specified = 1;
3064 /* Validate -mabi= value. */
3065 if (ix86_abi_string)
3067 if (strcmp (ix86_abi_string, "sysv") == 0)
3068 ix86_abi = SYSV_ABI;
3069 else if (strcmp (ix86_abi_string, "ms") == 0)
3072 error ("unknown ABI (%s) for %sabi=%s %s",
3073 ix86_abi_string, prefix, suffix, sw);
3076 ix86_abi = DEFAULT_ABI;
3078 if (ix86_cmodel_string != 0)
3080 if (!strcmp (ix86_cmodel_string, "small"))
3081 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3082 else if (!strcmp (ix86_cmodel_string, "medium"))
3083 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
3084 else if (!strcmp (ix86_cmodel_string, "large"))
3085 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
3087 error ("code model %s does not support PIC mode", ix86_cmodel_string);
3088 else if (!strcmp (ix86_cmodel_string, "32"))
3089 ix86_cmodel = CM_32;
3090 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
3091 ix86_cmodel = CM_KERNEL;
3093 error ("bad value (%s) for %scmodel=%s %s",
3094 ix86_cmodel_string, prefix, suffix, sw);
3098 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3099 use of rip-relative addressing. This eliminates fixups that
3100 would otherwise be needed if this object is to be placed in a
3101 DLL, and is essentially just as efficient as direct addressing. */
3102 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3103 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3104 else if (TARGET_64BIT)
3105 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3107 ix86_cmodel = CM_32;
3109 if (ix86_asm_string != 0)
3112 && !strcmp (ix86_asm_string, "intel"))
3113 ix86_asm_dialect = ASM_INTEL;
3114 else if (!strcmp (ix86_asm_string, "att"))
3115 ix86_asm_dialect = ASM_ATT;
3117 error ("bad value (%s) for %sasm=%s %s",
3118 ix86_asm_string, prefix, suffix, sw);
3120 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
3121 error ("code model %qs not supported in the %s bit mode",
3122 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
3123 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3124 sorry ("%i-bit mode not compiled in",
3125 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3127 for (i = 0; i < pta_size; i++)
3128 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3130 ix86_schedule = processor_alias_table[i].schedule;
3131 ix86_arch = processor_alias_table[i].processor;
3132 /* Default cpu tuning to the architecture. */
3133 ix86_tune = ix86_arch;
3135 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3136 error ("CPU you selected does not support x86-64 "
3139 if (processor_alias_table[i].flags & PTA_MMX
3140 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3141 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3142 if (processor_alias_table[i].flags & PTA_3DNOW
3143 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3144 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3145 if (processor_alias_table[i].flags & PTA_3DNOW_A
3146 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3147 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3148 if (processor_alias_table[i].flags & PTA_SSE
3149 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3150 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3151 if (processor_alias_table[i].flags & PTA_SSE2
3152 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3153 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3154 if (processor_alias_table[i].flags & PTA_SSE3
3155 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3156 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3157 if (processor_alias_table[i].flags & PTA_SSSE3
3158 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3159 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3160 if (processor_alias_table[i].flags & PTA_SSE4_1
3161 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3162 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3163 if (processor_alias_table[i].flags & PTA_SSE4_2
3164 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3165 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3166 if (processor_alias_table[i].flags & PTA_AVX
3167 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3168 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3169 if (processor_alias_table[i].flags & PTA_FMA
3170 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3171 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3172 if (processor_alias_table[i].flags & PTA_SSE4A
3173 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3174 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3175 if (processor_alias_table[i].flags & PTA_FMA4
3176 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3177 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3178 if (processor_alias_table[i].flags & PTA_XOP
3179 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3180 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3181 if (processor_alias_table[i].flags & PTA_LWP
3182 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3183 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3184 if (processor_alias_table[i].flags & PTA_ABM
3185 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3186 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3187 if (processor_alias_table[i].flags & PTA_CX16
3188 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3189 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3190 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3191 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3192 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3193 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3194 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3195 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3196 if (processor_alias_table[i].flags & PTA_MOVBE
3197 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3198 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3199 if (processor_alias_table[i].flags & PTA_AES
3200 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3201 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3202 if (processor_alias_table[i].flags & PTA_PCLMUL
3203 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3204 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3205 if (processor_alias_table[i].flags & PTA_FSGSBASE
3206 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3207 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3208 if (processor_alias_table[i].flags & PTA_RDRND
3209 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3210 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3211 if (processor_alias_table[i].flags & PTA_F16C
3212 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3213 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3214 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3215 x86_prefetch_sse = true;
3220 if (!strcmp (ix86_arch_string, "generic"))
3221 error ("generic CPU can be used only for %stune=%s %s",
3222 prefix, suffix, sw);
3223 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3224 error ("bad value (%s) for %sarch=%s %s",
3225 ix86_arch_string, prefix, suffix, sw);
3227 ix86_arch_mask = 1u << ix86_arch;
3228 for (i = 0; i < X86_ARCH_LAST; ++i)
3229 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3231 for (i = 0; i < pta_size; i++)
3232 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3234 ix86_schedule = processor_alias_table[i].schedule;
3235 ix86_tune = processor_alias_table[i].processor;
3236 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3238 if (ix86_tune_defaulted)
3240 ix86_tune_string = "x86-64";
3241 for (i = 0; i < pta_size; i++)
3242 if (! strcmp (ix86_tune_string,
3243 processor_alias_table[i].name))
3245 ix86_schedule = processor_alias_table[i].schedule;
3246 ix86_tune = processor_alias_table[i].processor;
3249 error ("CPU you selected does not support x86-64 "
3252 /* Intel CPUs have always interpreted SSE prefetch instructions as
3253 NOPs; so, we can enable SSE prefetch instructions even when
3254 -mtune (rather than -march) points us to a processor that has them.
3255 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3256 higher processors. */
3258 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3259 x86_prefetch_sse = true;
3263 if (ix86_tune_specified && i == pta_size)
3264 error ("bad value (%s) for %stune=%s %s",
3265 ix86_tune_string, prefix, suffix, sw);
3267 ix86_tune_mask = 1u << ix86_tune;
3268 for (i = 0; i < X86_TUNE_LAST; ++i)
3269 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3271 #ifndef USE_IX86_FRAME_POINTER
3272 #define USE_IX86_FRAME_POINTER 0
3275 /* Set the default values for switches whose default depends on TARGET_64BIT
3276 in case they weren't overwritten by command line options. */
3281 if (flag_omit_frame_pointer == 2)
3282 flag_omit_frame_pointer = 1;
3283 if (flag_asynchronous_unwind_tables == 2)
3284 flag_asynchronous_unwind_tables = 1;
3285 if (flag_pcc_struct_return == 2)
3286 flag_pcc_struct_return = 0;
3292 if (flag_omit_frame_pointer == 2)
3293 flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
3294 if (flag_asynchronous_unwind_tables == 2)
3295 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3296 if (flag_pcc_struct_return == 2)
3297 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3301 ix86_cost = &ix86_size_cost;
3303 ix86_cost = processor_target_table[ix86_tune].cost;
3305 /* Arrange to set up i386_stack_locals for all functions. */
3306 init_machine_status = ix86_init_machine_status;
3308 /* Validate -mregparm= value. */
3309 if (ix86_regparm_string)
3312 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3313 i = atoi (ix86_regparm_string);
3314 if (i < 0 || i > REGPARM_MAX)
3315 error ("%sregparm=%d%s is not between 0 and %d",
3316 prefix, i, suffix, REGPARM_MAX);
3321 ix86_regparm = REGPARM_MAX;
3323 /* If the user has provided any of the -malign-* options,
3324 warn and use that value only if -falign-* is not set.
3325 Remove this code in GCC 3.2 or later. */
3326 if (ix86_align_loops_string)
3328 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3329 prefix, suffix, suffix);
3330 if (align_loops == 0)
3332 i = atoi (ix86_align_loops_string);
3333 if (i < 0 || i > MAX_CODE_ALIGN)
3334 error ("%salign-loops=%d%s is not between 0 and %d",
3335 prefix, i, suffix, MAX_CODE_ALIGN);
3337 align_loops = 1 << i;
3341 if (ix86_align_jumps_string)
3343 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3344 prefix, suffix, suffix);
3345 if (align_jumps == 0)
3347 i = atoi (ix86_align_jumps_string);
3348 if (i < 0 || i > MAX_CODE_ALIGN)
3349 error ("%salign-loops=%d%s is not between 0 and %d",
3350 prefix, i, suffix, MAX_CODE_ALIGN);
3352 align_jumps = 1 << i;
3356 if (ix86_align_funcs_string)
3358 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3359 prefix, suffix, suffix);
3360 if (align_functions == 0)
3362 i = atoi (ix86_align_funcs_string);
3363 if (i < 0 || i > MAX_CODE_ALIGN)
3364 error ("%salign-loops=%d%s is not between 0 and %d",
3365 prefix, i, suffix, MAX_CODE_ALIGN);
3367 align_functions = 1 << i;
3371 /* Default align_* from the processor table. */
3372 if (align_loops == 0)
3374 align_loops = processor_target_table[ix86_tune].align_loop;
3375 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3377 if (align_jumps == 0)
3379 align_jumps = processor_target_table[ix86_tune].align_jump;
3380 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3382 if (align_functions == 0)
3384 align_functions = processor_target_table[ix86_tune].align_func;
3387 /* Validate -mbranch-cost= value, or provide default. */
3388 ix86_branch_cost = ix86_cost->branch_cost;
3389 if (ix86_branch_cost_string)
3391 i = atoi (ix86_branch_cost_string);
3393 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3395 ix86_branch_cost = i;
3397 if (ix86_section_threshold_string)
3399 i = atoi (ix86_section_threshold_string);
3401 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3403 ix86_section_threshold = i;
3406 if (ix86_tls_dialect_string)
3408 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3409 ix86_tls_dialect = TLS_DIALECT_GNU;
3410 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3411 ix86_tls_dialect = TLS_DIALECT_GNU2;
3413 error ("bad value (%s) for %stls-dialect=%s %s",
3414 ix86_tls_dialect_string, prefix, suffix, sw);
3417 if (ix87_precision_string)
3419 i = atoi (ix87_precision_string);
3420 if (i != 32 && i != 64 && i != 80)
3421 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3426 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3428 /* Enable by default the SSE and MMX builtins. Do allow the user to
3429 explicitly disable any of these. In particular, disabling SSE and
3430 MMX for kernel code is extremely useful. */
3431 if (!ix86_arch_specified)
3433 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3434 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3437 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3441 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3443 if (!ix86_arch_specified)
3445 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3447 /* i386 ABI does not specify red zone. It still makes sense to use it
3448 when programmer takes care to stack from being destroyed. */
3449 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3450 target_flags |= MASK_NO_RED_ZONE;
3453 /* Keep nonleaf frame pointers. */
3454 if (flag_omit_frame_pointer)
3455 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3456 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3457 flag_omit_frame_pointer = 1;
3459 /* If we're doing fast math, we don't care about comparison order
3460 wrt NaNs. This lets us use a shorter comparison sequence. */
3461 if (flag_finite_math_only)
3462 target_flags &= ~MASK_IEEE_FP;
3464 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3465 since the insns won't need emulation. */
3466 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3467 target_flags &= ~MASK_NO_FANCY_MATH_387;
3469 /* Likewise, if the target doesn't have a 387, or we've specified
3470 software floating point, don't use 387 inline intrinsics. */
3472 target_flags |= MASK_NO_FANCY_MATH_387;
3474 /* Turn on MMX builtins for -msse. */
3477 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3478 x86_prefetch_sse = true;
3481 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3482 if (TARGET_SSE4_2 || TARGET_ABM)
3483 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3485 /* Validate -mpreferred-stack-boundary= value or default it to
3486 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3487 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3488 if (ix86_preferred_stack_boundary_string)
3490 i = atoi (ix86_preferred_stack_boundary_string);
3491 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3492 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3493 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3495 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3498 /* Set the default value for -mstackrealign. */
3499 if (ix86_force_align_arg_pointer == -1)
3500 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3502 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3504 /* Validate -mincoming-stack-boundary= value or default it to
3505 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3506 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3507 if (ix86_incoming_stack_boundary_string)
3509 i = atoi (ix86_incoming_stack_boundary_string);
3510 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3511 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3512 i, TARGET_64BIT ? 4 : 2);
3515 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3516 ix86_incoming_stack_boundary
3517 = ix86_user_incoming_stack_boundary;
3521 /* Accept -msseregparm only if at least SSE support is enabled. */
3522 if (TARGET_SSEREGPARM
3524 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3526 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3527 if (ix86_fpmath_string != 0)
3529 if (! strcmp (ix86_fpmath_string, "387"))
3530 ix86_fpmath = FPMATH_387;
3531 else if (! strcmp (ix86_fpmath_string, "sse"))
3535 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3536 ix86_fpmath = FPMATH_387;
3539 ix86_fpmath = FPMATH_SSE;
3541 else if (! strcmp (ix86_fpmath_string, "387,sse")
3542 || ! strcmp (ix86_fpmath_string, "387+sse")
3543 || ! strcmp (ix86_fpmath_string, "sse,387")
3544 || ! strcmp (ix86_fpmath_string, "sse+387")
3545 || ! strcmp (ix86_fpmath_string, "both"))
3549 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3550 ix86_fpmath = FPMATH_387;
3552 else if (!TARGET_80387)
3554 warning (0, "387 instruction set disabled, using SSE arithmetics");
3555 ix86_fpmath = FPMATH_SSE;
3558 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3561 error ("bad value (%s) for %sfpmath=%s %s",
3562 ix86_fpmath_string, prefix, suffix, sw);
3565 /* If the i387 is disabled, then do not return values in it. */
3567 target_flags &= ~MASK_FLOAT_RETURNS;
3569 /* Use external vectorized library in vectorizing intrinsics. */
3570 if (ix86_veclibabi_string)
3572 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3573 ix86_veclib_handler = ix86_veclibabi_svml;
3574 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3575 ix86_veclib_handler = ix86_veclibabi_acml;
3577 error ("unknown vectorization library ABI type (%s) for "
3578 "%sveclibabi=%s %s", ix86_veclibabi_string,
3579 prefix, suffix, sw);
3582 if ((!USE_IX86_FRAME_POINTER
3583 || (x86_accumulate_outgoing_args & ix86_tune_mask))
3584 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3586 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3588 /* ??? Unwind info is not correct around the CFG unless either a frame
3589 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3590 unwind info generation to be aware of the CFG and propagating states
3592 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3593 || flag_exceptions || flag_non_call_exceptions)
3594 && flag_omit_frame_pointer
3595 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3597 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3598 warning (0, "unwind tables currently require either a frame pointer "
3599 "or %saccumulate-outgoing-args%s for correctness",
3601 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3604 /* If stack probes are required, the space used for large function
3605 arguments on the stack must also be probed, so enable
3606 -maccumulate-outgoing-args so this happens in the prologue. */
3607 if (TARGET_STACK_PROBE
3608 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3610 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3611 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3612 "for correctness", prefix, suffix);
3613 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3616 /* For sane SSE instruction set generation we need fcomi instruction.
3617 It is safe to enable all CMOVE instructions. */
3621 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3624 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3625 p = strchr (internal_label_prefix, 'X');
3626 internal_label_prefix_len = p - internal_label_prefix;
3630 /* When scheduling description is not available, disable scheduler pass
3631 so it won't slow down the compilation and make x87 code slower. */
3632 if (!TARGET_SCHEDULE)
3633 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3635 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3636 ix86_cost->simultaneous_prefetches,
3637 global_options.x_param_values,
3638 global_options_set.x_param_values);
3639 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, ix86_cost->prefetch_block,
3640 global_options.x_param_values,
3641 global_options_set.x_param_values);
3642 maybe_set_param_value (PARAM_L1_CACHE_SIZE, ix86_cost->l1_cache_size,
3643 global_options.x_param_values,
3644 global_options_set.x_param_values);
3645 maybe_set_param_value (PARAM_L2_CACHE_SIZE, ix86_cost->l2_cache_size,
3646 global_options.x_param_values,
3647 global_options_set.x_param_values);
3649 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3650 if (flag_prefetch_loop_arrays < 0
3653 && software_prefetching_beneficial_p ())
3654 flag_prefetch_loop_arrays = 1;
3656 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3657 can be optimized to ap = __builtin_next_arg (0). */
3658 if (!TARGET_64BIT && !flag_split_stack)
3659 targetm.expand_builtin_va_start = NULL;
3663 ix86_gen_leave = gen_leave_rex64;
3664 ix86_gen_add3 = gen_adddi3;
3665 ix86_gen_sub3 = gen_subdi3;
3666 ix86_gen_sub3_carry = gen_subdi3_carry;
3667 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3668 ix86_gen_monitor = gen_sse3_monitor64;
3669 ix86_gen_andsp = gen_anddi3;
3670 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
3671 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
3672 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
3676 ix86_gen_leave = gen_leave;
3677 ix86_gen_add3 = gen_addsi3;
3678 ix86_gen_sub3 = gen_subsi3;
3679 ix86_gen_sub3_carry = gen_subsi3_carry;
3680 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3681 ix86_gen_monitor = gen_sse3_monitor;
3682 ix86_gen_andsp = gen_andsi3;
3683 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
3684 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
3685 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
3689 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3691 target_flags |= MASK_CLD & ~target_flags_explicit;
3694 if (!TARGET_64BIT && flag_pic)
3696 if (flag_fentry > 0)
3697 sorry ("-mfentry isn't supported for 32-bit in combination with -fpic");
3700 if (flag_fentry < 0)
3702 #if defined(PROFILE_BEFORE_PROLOGUE)
3709 /* Save the initial options in case the user does function specific options */
3711 target_option_default_node = target_option_current_node
3712 = build_target_option_node ();
3715 /* Implement the TARGET_OPTION_OVERRIDE hook. */
3718 ix86_option_override (void)
3720 ix86_option_override_internal (true);
3723 /* Update register usage after having seen the compiler flags. */
3726 ix86_conditional_register_usage (void)
3731 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3733 if (fixed_regs[i] > 1)
3734 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
3735 if (call_used_regs[i] > 1)
3736 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
3739 /* The PIC register, if it exists, is fixed. */
3740 j = PIC_OFFSET_TABLE_REGNUM;
3741 if (j != INVALID_REGNUM)
3742 fixed_regs[j] = call_used_regs[j] = 1;
3744 /* The MS_ABI changes the set of call-used registers. */
3745 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3747 call_used_regs[SI_REG] = 0;
3748 call_used_regs[DI_REG] = 0;
3749 call_used_regs[XMM6_REG] = 0;
3750 call_used_regs[XMM7_REG] = 0;
3751 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3752 call_used_regs[i] = 0;
3755 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3756 other call-clobbered regs for 64-bit. */
3759 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
3761 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3762 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
3763 && call_used_regs[i])
3764 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
3767 /* If MMX is disabled, squash the registers. */
3769 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3770 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
3771 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3773 /* If SSE is disabled, squash the registers. */
3775 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3776 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
3777 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3779 /* If the FPU is disabled, squash the registers. */
3780 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3781 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3782 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
3783 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3785 /* If 32-bit, squash the 64-bit registers. */
3788 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
3790 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3796 /* Save the current options */
3799 ix86_function_specific_save (struct cl_target_option *ptr)
3801 ptr->arch = ix86_arch;
3802 ptr->schedule = ix86_schedule;
3803 ptr->tune = ix86_tune;
3804 ptr->fpmath = ix86_fpmath;
3805 ptr->branch_cost = ix86_branch_cost;
3806 ptr->tune_defaulted = ix86_tune_defaulted;
3807 ptr->arch_specified = ix86_arch_specified;
3808 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3809 ptr->ix86_target_flags_explicit = target_flags_explicit;
3811 /* The fields are char but the variables are not; make sure the
3812 values fit in the fields. */
3813 gcc_assert (ptr->arch == ix86_arch);
3814 gcc_assert (ptr->schedule == ix86_schedule);
3815 gcc_assert (ptr->tune == ix86_tune);
3816 gcc_assert (ptr->fpmath == ix86_fpmath);
3817 gcc_assert (ptr->branch_cost == ix86_branch_cost);
3820 /* Restore the current options */
3823 ix86_function_specific_restore (struct cl_target_option *ptr)
3825 enum processor_type old_tune = ix86_tune;
3826 enum processor_type old_arch = ix86_arch;
3827 unsigned int ix86_arch_mask, ix86_tune_mask;
3830 ix86_arch = (enum processor_type) ptr->arch;
3831 ix86_schedule = (enum attr_cpu) ptr->schedule;
3832 ix86_tune = (enum processor_type) ptr->tune;
3833 ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
3834 ix86_branch_cost = ptr->branch_cost;
3835 ix86_tune_defaulted = ptr->tune_defaulted;
3836 ix86_arch_specified = ptr->arch_specified;
3837 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3838 target_flags_explicit = ptr->ix86_target_flags_explicit;
3840 /* Recreate the arch feature tests if the arch changed */
3841 if (old_arch != ix86_arch)
3843 ix86_arch_mask = 1u << ix86_arch;
3844 for (i = 0; i < X86_ARCH_LAST; ++i)
3845 ix86_arch_features[i]
3846 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3849 /* Recreate the tune optimization tests */
3850 if (old_tune != ix86_tune)
3852 ix86_tune_mask = 1u << ix86_tune;
3853 for (i = 0; i < X86_TUNE_LAST; ++i)
3854 ix86_tune_features[i]
3855 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3859 /* Print the current options */
3862 ix86_function_specific_print (FILE *file, int indent,
3863 struct cl_target_option *ptr)
3866 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
3867 NULL, NULL, NULL, false);
3869 fprintf (file, "%*sarch = %d (%s)\n",
3872 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3873 ? cpu_names[ptr->arch]
3876 fprintf (file, "%*stune = %d (%s)\n",
3879 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3880 ? cpu_names[ptr->tune]
3883 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3884 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3885 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3886 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3890 fprintf (file, "%*s%s\n", indent, "", target_string);
3891 free (target_string);
3896 /* Inner function to process the attribute((target(...))), take an argument and
3897 set the current options from the argument. If we have a list, recursively go
3901 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3906 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3907 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3908 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3909 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3924 enum ix86_opt_type type;
3929 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3930 IX86_ATTR_ISA ("abm", OPT_mabm),
3931 IX86_ATTR_ISA ("aes", OPT_maes),
3932 IX86_ATTR_ISA ("avx", OPT_mavx),
3933 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3934 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3935 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3936 IX86_ATTR_ISA ("sse", OPT_msse),
3937 IX86_ATTR_ISA ("sse2", OPT_msse2),
3938 IX86_ATTR_ISA ("sse3", OPT_msse3),
3939 IX86_ATTR_ISA ("sse4", OPT_msse4),
3940 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3941 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3942 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3943 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3944 IX86_ATTR_ISA ("fma4", OPT_mfma4),
3945 IX86_ATTR_ISA ("xop", OPT_mxop),
3946 IX86_ATTR_ISA ("lwp", OPT_mlwp),
3947 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
3948 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
3949 IX86_ATTR_ISA ("f16c", OPT_mf16c),
3951 /* string options */
3952 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3953 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3954 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3957 IX86_ATTR_YES ("cld",
3961 IX86_ATTR_NO ("fancy-math-387",
3962 OPT_mfancy_math_387,
3963 MASK_NO_FANCY_MATH_387),
3965 IX86_ATTR_YES ("ieee-fp",
3969 IX86_ATTR_YES ("inline-all-stringops",
3970 OPT_minline_all_stringops,
3971 MASK_INLINE_ALL_STRINGOPS),
3973 IX86_ATTR_YES ("inline-stringops-dynamically",
3974 OPT_minline_stringops_dynamically,
3975 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3977 IX86_ATTR_NO ("align-stringops",
3978 OPT_mno_align_stringops,
3979 MASK_NO_ALIGN_STRINGOPS),
3981 IX86_ATTR_YES ("recip",
3987 /* If this is a list, recurse to get the options. */
3988 if (TREE_CODE (args) == TREE_LIST)
3992 for (; args; args = TREE_CHAIN (args))
3993 if (TREE_VALUE (args)
3994 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
4000 else if (TREE_CODE (args) != STRING_CST)
4003 /* Handle multiple arguments separated by commas. */
4004 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4006 while (next_optstr && *next_optstr != '\0')
4008 char *p = next_optstr;
4010 char *comma = strchr (next_optstr, ',');
4011 const char *opt_string;
4012 size_t len, opt_len;
4017 enum ix86_opt_type type = ix86_opt_unknown;
4023 len = comma - next_optstr;
4024 next_optstr = comma + 1;
4032 /* Recognize no-xxx. */
4033 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4042 /* Find the option. */
4045 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4047 type = attrs[i].type;
4048 opt_len = attrs[i].len;
4049 if (ch == attrs[i].string[0]
4050 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
4051 && memcmp (p, attrs[i].string, opt_len) == 0)
4054 mask = attrs[i].mask;
4055 opt_string = attrs[i].string;
4060 /* Process the option. */
4063 error ("attribute(target(\"%s\")) is unknown", orig_p);
4067 else if (type == ix86_opt_isa)
4068 ix86_handle_option (opt, p, opt_set_p);
4070 else if (type == ix86_opt_yes || type == ix86_opt_no)
4072 if (type == ix86_opt_no)
4073 opt_set_p = !opt_set_p;
4076 target_flags |= mask;
4078 target_flags &= ~mask;
4081 else if (type == ix86_opt_str)
4085 error ("option(\"%s\") was already specified", opt_string);
4089 p_strings[opt] = xstrdup (p + opt_len);
4099 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4102 ix86_valid_target_attribute_tree (tree args)
4104 const char *orig_arch_string = ix86_arch_string;
4105 const char *orig_tune_string = ix86_tune_string;
4106 const char *orig_fpmath_string = ix86_fpmath_string;
4107 int orig_tune_defaulted = ix86_tune_defaulted;
4108 int orig_arch_specified = ix86_arch_specified;
4109 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
4112 struct cl_target_option *def
4113 = TREE_TARGET_OPTION (target_option_default_node);
4115 /* Process each of the options on the chain. */
4116 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
4119 /* If the changed options are different from the default, rerun
4120 ix86_option_override_internal, and then save the options away.
4121 The string options are are attribute options, and will be undone
4122 when we copy the save structure. */
4123 if (ix86_isa_flags != def->x_ix86_isa_flags
4124 || target_flags != def->x_target_flags
4125 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4126 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4127 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4129 /* If we are using the default tune= or arch=, undo the string assigned,
4130 and use the default. */
4131 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4132 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4133 else if (!orig_arch_specified)
4134 ix86_arch_string = NULL;
4136 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4137 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4138 else if (orig_tune_defaulted)
4139 ix86_tune_string = NULL;
4141 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4142 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4143 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
4144 else if (!TARGET_64BIT && TARGET_SSE)
4145 ix86_fpmath_string = "sse,387";
4147 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4148 ix86_option_override_internal (false);
4150 /* Add any builtin functions with the new isa if any. */
4151 ix86_add_new_builtins (ix86_isa_flags);
4153 /* Save the current options unless we are validating options for
4155 t = build_target_option_node ();
4157 ix86_arch_string = orig_arch_string;
4158 ix86_tune_string = orig_tune_string;
4159 ix86_fpmath_string = orig_fpmath_string;
4161 /* Free up memory allocated to hold the strings */
4162 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4163 if (option_strings[i])
4164 free (option_strings[i]);
4170 /* Hook to validate attribute((target("string"))). */
4173 ix86_valid_target_attribute_p (tree fndecl,
4174 tree ARG_UNUSED (name),
4176 int ARG_UNUSED (flags))
4178 struct cl_target_option cur_target;
4180 tree old_optimize = build_optimization_node ();
4181 tree new_target, new_optimize;
4182 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4184 /* If the function changed the optimization levels as well as setting target
4185 options, start with the optimizations specified. */
4186 if (func_optimize && func_optimize != old_optimize)
4187 cl_optimization_restore (&global_options,
4188 TREE_OPTIMIZATION (func_optimize));
4190 /* The target attributes may also change some optimization flags, so update
4191 the optimization options if necessary. */
4192 cl_target_option_save (&cur_target, &global_options);
4193 new_target = ix86_valid_target_attribute_tree (args);
4194 new_optimize = build_optimization_node ();
4201 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4203 if (old_optimize != new_optimize)
4204 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4207 cl_target_option_restore (&global_options, &cur_target);
4209 if (old_optimize != new_optimize)
4210 cl_optimization_restore (&global_options,
4211 TREE_OPTIMIZATION (old_optimize));
4217 /* Hook to determine if one function can safely inline another. */
4220 ix86_can_inline_p (tree caller, tree callee)
4223 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4224 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4226 /* If callee has no option attributes, then it is ok to inline. */
4230 /* If caller has no option attributes, but callee does then it is not ok to
4232 else if (!caller_tree)
4237 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4238 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4240 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4241 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4243 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4244 != callee_opts->x_ix86_isa_flags)
4247 /* See if we have the same non-isa options. */
4248 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4251 /* See if arch, tune, etc. are the same. */
4252 else if (caller_opts->arch != callee_opts->arch)
4255 else if (caller_opts->tune != callee_opts->tune)
4258 else if (caller_opts->fpmath != callee_opts->fpmath)
4261 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4272 /* Remember the last target of ix86_set_current_function. */
4273 static GTY(()) tree ix86_previous_fndecl;
4275 /* Establish appropriate back-end context for processing the function
4276 FNDECL. The argument might be NULL to indicate processing at top
4277 level, outside of any function scope. */
4279 ix86_set_current_function (tree fndecl)
4281 /* Only change the context if the function changes. This hook is called
4282 several times in the course of compiling a function, and we don't want to
4283 slow things down too much or call target_reinit when it isn't safe. */
4284 if (fndecl && fndecl != ix86_previous_fndecl)
4286 tree old_tree = (ix86_previous_fndecl
4287 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4290 tree new_tree = (fndecl
4291 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4294 ix86_previous_fndecl = fndecl;
4295 if (old_tree == new_tree)
4300 cl_target_option_restore (&global_options,
4301 TREE_TARGET_OPTION (new_tree));
4307 struct cl_target_option *def
4308 = TREE_TARGET_OPTION (target_option_current_node);
4310 cl_target_option_restore (&global_options, def);
4317 /* Return true if this goes in large data/bss. */
4320 ix86_in_large_data_p (tree exp)
4322 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4325 /* Functions are never large data. */
4326 if (TREE_CODE (exp) == FUNCTION_DECL)
4329 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4331 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4332 if (strcmp (section, ".ldata") == 0
4333 || strcmp (section, ".lbss") == 0)
4339 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4341 /* If this is an incomplete type with size 0, then we can't put it
4342 in data because it might be too big when completed. */
4343 if (!size || size > ix86_section_threshold)
4350 /* Switch to the appropriate section for output of DECL.
4351 DECL is either a `VAR_DECL' node or a constant of some sort.
4352 RELOC indicates whether forming the initial value of DECL requires
4353 link-time relocations. */
4355 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4359 x86_64_elf_select_section (tree decl, int reloc,
4360 unsigned HOST_WIDE_INT align)
4362 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4363 && ix86_in_large_data_p (decl))
4365 const char *sname = NULL;
4366 unsigned int flags = SECTION_WRITE;
4367 switch (categorize_decl_for_section (decl, reloc))
4372 case SECCAT_DATA_REL:
4373 sname = ".ldata.rel";
4375 case SECCAT_DATA_REL_LOCAL:
4376 sname = ".ldata.rel.local";
4378 case SECCAT_DATA_REL_RO:
4379 sname = ".ldata.rel.ro";
4381 case SECCAT_DATA_REL_RO_LOCAL:
4382 sname = ".ldata.rel.ro.local";
4386 flags |= SECTION_BSS;
4389 case SECCAT_RODATA_MERGE_STR:
4390 case SECCAT_RODATA_MERGE_STR_INIT:
4391 case SECCAT_RODATA_MERGE_CONST:
4395 case SECCAT_SRODATA:
4402 /* We don't split these for medium model. Place them into
4403 default sections and hope for best. */
4408 /* We might get called with string constants, but get_named_section
4409 doesn't like them as they are not DECLs. Also, we need to set
4410 flags in that case. */
4412 return get_section (sname, flags, NULL);
4413 return get_named_section (decl, sname, reloc);
4416 return default_elf_select_section (decl, reloc, align);
4419 /* Build up a unique section name, expressed as a
4420 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4421 RELOC indicates whether the initial value of EXP requires
4422 link-time relocations. */
4424 static void ATTRIBUTE_UNUSED
4425 x86_64_elf_unique_section (tree decl, int reloc)
4427 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4428 && ix86_in_large_data_p (decl))
4430 const char *prefix = NULL;
4431 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4432 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4434 switch (categorize_decl_for_section (decl, reloc))
4437 case SECCAT_DATA_REL:
4438 case SECCAT_DATA_REL_LOCAL:
4439 case SECCAT_DATA_REL_RO:
4440 case SECCAT_DATA_REL_RO_LOCAL:
4441 prefix = one_only ? ".ld" : ".ldata";
4444 prefix = one_only ? ".lb" : ".lbss";
4447 case SECCAT_RODATA_MERGE_STR:
4448 case SECCAT_RODATA_MERGE_STR_INIT:
4449 case SECCAT_RODATA_MERGE_CONST:
4450 prefix = one_only ? ".lr" : ".lrodata";
4452 case SECCAT_SRODATA:
4459 /* We don't split these for medium model. Place them into
4460 default sections and hope for best. */
4465 const char *name, *linkonce;
4468 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4469 name = targetm.strip_name_encoding (name);
4471 /* If we're using one_only, then there needs to be a .gnu.linkonce
4472 prefix to the section name. */
4473 linkonce = one_only ? ".gnu.linkonce" : "";
4475 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4477 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4481 default_unique_section (decl, reloc);
4484 #ifdef COMMON_ASM_OP
4485 /* This says how to output assembler code to declare an
4486 uninitialized external linkage data object.
4488 For medium model x86-64 we need to use .largecomm opcode for
4491 x86_elf_aligned_common (FILE *file,
4492 const char *name, unsigned HOST_WIDE_INT size,
4495 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4496 && size > (unsigned int)ix86_section_threshold)
4497 fputs (".largecomm\t", file);
4499 fputs (COMMON_ASM_OP, file);
4500 assemble_name (file, name);
4501 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4502 size, align / BITS_PER_UNIT);
4506 /* Utility function for targets to use in implementing
4507 ASM_OUTPUT_ALIGNED_BSS. */
4510 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4511 const char *name, unsigned HOST_WIDE_INT size,
4514 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4515 && size > (unsigned int)ix86_section_threshold)
4516 switch_to_section (get_named_section (decl, ".lbss", 0));
4518 switch_to_section (bss_section);
4519 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4520 #ifdef ASM_DECLARE_OBJECT_NAME
4521 last_assemble_variable_decl = decl;
4522 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4524 /* Standard thing is just output label for the object. */
4525 ASM_OUTPUT_LABEL (file, name);
4526 #endif /* ASM_DECLARE_OBJECT_NAME */
4527 ASM_OUTPUT_SKIP (file, size ? size : 1);
4531 ix86_option_optimization (int level, int size ATTRIBUTE_UNUSED)
4533 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
4534 make the problem with not enough registers even worse. */
4535 #ifdef INSN_SCHEDULING
4537 flag_schedule_insns = 0;
4540 /* The default values of these switches depend on the TARGET_64BIT
4541 that is not known at this moment. Mark these values with 2 and
4542 let user the to override these. In case there is no command line
4543 option specifying them, we will set the defaults in
4544 ix86_option_override_internal. */
4546 flag_omit_frame_pointer = 2;
4548 /* For -O2 and beyond, turn on -fzee for x86_64 target. */
4552 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4553 SUBTARGET_OPTIMIZATION_OPTIONS;
4557 /* Implement TARGET_OPTION_INIT_STRUCT. */
4560 ix86_option_init_struct (struct gcc_options *opts)
4563 /* The Darwin libraries never set errno, so we might as well
4564 avoid calling them when that's the only reason we would. */
4565 opts->x_flag_errno_math = 0;
4567 opts->x_flag_pcc_struct_return = 2;
4568 opts->x_flag_asynchronous_unwind_tables = 2;
4569 opts->x_flag_vect_cost_model = 1;
4572 /* Decide whether we must probe the stack before any space allocation
4573 on this target. It's essentially TARGET_STACK_PROBE except when
4574 -fstack-check causes the stack to be already probed differently. */
4577 ix86_target_stack_probe (void)
4579 /* Do not probe the stack twice if static stack checking is enabled. */
4580 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
4583 return TARGET_STACK_PROBE;
4586 /* Decide whether we can make a sibling call to a function. DECL is the
4587 declaration of the function being targeted by the call and EXP is the
4588 CALL_EXPR representing the call. */
4591 ix86_function_ok_for_sibcall (tree decl, tree exp)
4593 tree type, decl_or_type;
4596 /* If we are generating position-independent code, we cannot sibcall
4597 optimize any indirect call, or a direct call to a global function,
4598 as the PLT requires %ebx be live. */
4599 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4602 /* If we need to align the outgoing stack, then sibcalling would
4603 unalign the stack, which may break the called function. */
4604 if (ix86_minimum_incoming_stack_boundary (true)
4605 < PREFERRED_STACK_BOUNDARY)
4610 decl_or_type = decl;
4611 type = TREE_TYPE (decl);
4615 /* We're looking at the CALL_EXPR, we need the type of the function. */
4616 type = CALL_EXPR_FN (exp); /* pointer expression */
4617 type = TREE_TYPE (type); /* pointer type */
4618 type = TREE_TYPE (type); /* function type */
4619 decl_or_type = type;
4622 /* Check that the return value locations are the same. Like
4623 if we are returning floats on the 80387 register stack, we cannot
4624 make a sibcall from a function that doesn't return a float to a
4625 function that does or, conversely, from a function that does return
4626 a float to a function that doesn't; the necessary stack adjustment
4627 would not be executed. This is also the place we notice
4628 differences in the return value ABI. Note that it is ok for one
4629 of the functions to have void return type as long as the return
4630 value of the other is passed in a register. */
4631 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4632 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4634 if (STACK_REG_P (a) || STACK_REG_P (b))
4636 if (!rtx_equal_p (a, b))
4639 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4641 else if (!rtx_equal_p (a, b))
4646 /* The SYSV ABI has more call-clobbered registers;
4647 disallow sibcalls from MS to SYSV. */
4648 if (cfun->machine->call_abi == MS_ABI
4649 && ix86_function_type_abi (type) == SYSV_ABI)
4654 /* If this call is indirect, we'll need to be able to use a
4655 call-clobbered register for the address of the target function.
4656 Make sure that all such registers are not used for passing
4657 parameters. Note that DLLIMPORT functions are indirect. */
4659 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4661 if (ix86_function_regparm (type, NULL) >= 3)
4663 /* ??? Need to count the actual number of registers to be used,
4664 not the possible number of registers. Fix later. */
4670 /* Otherwise okay. That also includes certain types of indirect calls. */
4674 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4675 and "sseregparm" calling convention attributes;
4676 arguments as in struct attribute_spec.handler. */
4679 ix86_handle_cconv_attribute (tree *node, tree name,
4681 int flags ATTRIBUTE_UNUSED,
4684 if (TREE_CODE (*node) != FUNCTION_TYPE
4685 && TREE_CODE (*node) != METHOD_TYPE
4686 && TREE_CODE (*node) != FIELD_DECL
4687 && TREE_CODE (*node) != TYPE_DECL)
4689 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4691 *no_add_attrs = true;
4695 /* Can combine regparm with all attributes but fastcall. */
4696 if (is_attribute_p ("regparm", name))
4700 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4702 error ("fastcall and regparm attributes are not compatible");
4705 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4707 error ("regparam and thiscall attributes are not compatible");
4710 cst = TREE_VALUE (args);
4711 if (TREE_CODE (cst) != INTEGER_CST)
4713 warning (OPT_Wattributes,
4714 "%qE attribute requires an integer constant argument",
4716 *no_add_attrs = true;
4718 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4720 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4722 *no_add_attrs = true;
4730 /* Do not warn when emulating the MS ABI. */
4731 if ((TREE_CODE (*node) != FUNCTION_TYPE
4732 && TREE_CODE (*node) != METHOD_TYPE)
4733 || ix86_function_type_abi (*node) != MS_ABI)
4734 warning (OPT_Wattributes, "%qE attribute ignored",
4736 *no_add_attrs = true;
4740 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4741 if (is_attribute_p ("fastcall", name))
4743 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4745 error ("fastcall and cdecl attributes are not compatible");
4747 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4749 error ("fastcall and stdcall attributes are not compatible");
4751 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4753 error ("fastcall and regparm attributes are not compatible");
4755 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4757 error ("fastcall and thiscall attributes are not compatible");
4761 /* Can combine stdcall with fastcall (redundant), regparm and
4763 else if (is_attribute_p ("stdcall", name))
4765 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4767 error ("stdcall and cdecl attributes are not compatible");
4769 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4771 error ("stdcall and fastcall attributes are not compatible");
4773 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4775 error ("stdcall and thiscall attributes are not compatible");
4779 /* Can combine cdecl with regparm and sseregparm. */
4780 else if (is_attribute_p ("cdecl", name))
4782 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4784 error ("stdcall and cdecl attributes are not compatible");
4786 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4788 error ("fastcall and cdecl attributes are not compatible");
4790 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4792 error ("cdecl and thiscall attributes are not compatible");
4795 else if (is_attribute_p ("thiscall", name))
4797 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
4798 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
4800 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4802 error ("stdcall and thiscall attributes are not compatible");
4804 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4806 error ("fastcall and thiscall attributes are not compatible");
4808 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4810 error ("cdecl and thiscall attributes are not compatible");
4814 /* Can combine sseregparm with all attributes. */
4819 /* Return 0 if the attributes for two types are incompatible, 1 if they
4820 are compatible, and 2 if they are nearly compatible (which causes a
4821 warning to be generated). */
4824 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4826 /* Check for mismatch of non-default calling convention. */
4827 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4829 if (TREE_CODE (type1) != FUNCTION_TYPE
4830 && TREE_CODE (type1) != METHOD_TYPE)
4833 /* Check for mismatched fastcall/regparm types. */
4834 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4835 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4836 || (ix86_function_regparm (type1, NULL)
4837 != ix86_function_regparm (type2, NULL)))
4840 /* Check for mismatched sseregparm types. */
4841 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4842 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4845 /* Check for mismatched thiscall types. */
4846 if (!lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type1))
4847 != !lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type2)))
4850 /* Check for mismatched return types (cdecl vs stdcall). */
4851 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4852 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4858 /* Return the regparm value for a function with the indicated TYPE and DECL.
4859 DECL may be NULL when calling function indirectly
4860 or considering a libcall. */
4863 ix86_function_regparm (const_tree type, const_tree decl)
4869 return (ix86_function_type_abi (type) == SYSV_ABI
4870 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
4872 regparm = ix86_regparm;
4873 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4876 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4880 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4883 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
4886 /* Use register calling convention for local functions when possible. */
4888 && TREE_CODE (decl) == FUNCTION_DECL
4890 && !(profile_flag && !flag_fentry))
4892 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4893 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
4896 int local_regparm, globals = 0, regno;
4898 /* Make sure no regparm register is taken by a
4899 fixed register variable. */
4900 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4901 if (fixed_regs[local_regparm])
4904 /* We don't want to use regparm(3) for nested functions as
4905 these use a static chain pointer in the third argument. */
4906 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
4909 /* In 32-bit mode save a register for the split stack. */
4910 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
4913 /* Each fixed register usage increases register pressure,
4914 so less registers should be used for argument passing.
4915 This functionality can be overriden by an explicit
4917 for (regno = 0; regno <= DI_REG; regno++)
4918 if (fixed_regs[regno])
4922 = globals < local_regparm ? local_regparm - globals : 0;
4924 if (local_regparm > regparm)
4925 regparm = local_regparm;
4932 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4933 DFmode (2) arguments in SSE registers for a function with the
4934 indicated TYPE and DECL. DECL may be NULL when calling function
4935 indirectly or considering a libcall. Otherwise return 0. */
4938 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4940 gcc_assert (!TARGET_64BIT);
4942 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4943 by the sseregparm attribute. */
4944 if (TARGET_SSEREGPARM
4945 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4952 error ("Calling %qD with attribute sseregparm without "
4953 "SSE/SSE2 enabled", decl);
4955 error ("Calling %qT with attribute sseregparm without "
4956 "SSE/SSE2 enabled", type);
4964 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4965 (and DFmode for SSE2) arguments in SSE registers. */
4966 if (decl && TARGET_SSE_MATH && optimize
4967 && !(profile_flag && !flag_fentry))
4969 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4970 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4972 return TARGET_SSE2 ? 2 : 1;
4978 /* Return true if EAX is live at the start of the function. Used by
4979 ix86_expand_prologue to determine if we need special help before
4980 calling allocate_stack_worker. */
4983 ix86_eax_live_at_start_p (void)
4985 /* Cheat. Don't bother working forward from ix86_function_regparm
4986 to the function type to whether an actual argument is located in
4987 eax. Instead just look at cfg info, which is still close enough
4988 to correct at this point. This gives false positives for broken
4989 functions that might use uninitialized data that happens to be
4990 allocated in eax, but who cares? */
4991 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4994 /* Value is the number of bytes of arguments automatically
4995 popped when returning from a subroutine call.
4996 FUNDECL is the declaration node of the function (as a tree),
4997 FUNTYPE is the data type of the function (as a tree),
4998 or for a library call it is an identifier node for the subroutine name.
4999 SIZE is the number of bytes of arguments passed on the stack.
5001 On the 80386, the RTD insn may be used to pop them if the number
5002 of args is fixed, but if the number is variable then the caller
5003 must pop them all. RTD can't be used for library calls now
5004 because the library is compiled with the Unix compiler.
5005 Use of RTD is a selectable option, since it is incompatible with
5006 standard Unix calling sequences. If the option is not selected,
5007 the caller must always pop the args.
5009 The attribute stdcall is equivalent to RTD on a per module basis. */
5012 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5016 /* None of the 64-bit ABIs pop arguments. */
5020 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
5022 /* Cdecl functions override -mrtd, and never pop the stack. */
5023 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
5025 /* Stdcall and fastcall functions will pop the stack if not
5027 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
5028 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype))
5029 || lookup_attribute ("thiscall", TYPE_ATTRIBUTES (funtype)))
5032 if (rtd && ! stdarg_p (funtype))
5036 /* Lose any fake structure return argument if it is passed on the stack. */
5037 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5038 && !KEEP_AGGREGATE_RETURN_POINTER)
5040 int nregs = ix86_function_regparm (funtype, fundecl);
5042 return GET_MODE_SIZE (Pmode);
5048 /* Argument support functions. */
5050 /* Return true when register may be used to pass function parameters. */
5052 ix86_function_arg_regno_p (int regno)
5055 const int *parm_regs;
5060 return (regno < REGPARM_MAX
5061 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5063 return (regno < REGPARM_MAX
5064 || (TARGET_MMX && MMX_REGNO_P (regno)
5065 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5066 || (TARGET_SSE && SSE_REGNO_P (regno)
5067 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5072 if (SSE_REGNO_P (regno) && TARGET_SSE)
5077 if (TARGET_SSE && SSE_REGNO_P (regno)
5078 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5082 /* TODO: The function should depend on current function ABI but
5083 builtins.c would need updating then. Therefore we use the
5086 /* RAX is used as hidden argument to va_arg functions. */
5087 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5090 if (ix86_abi == MS_ABI)
5091 parm_regs = x86_64_ms_abi_int_parameter_registers;
5093 parm_regs = x86_64_int_parameter_registers;
5094 for (i = 0; i < (ix86_abi == MS_ABI
5095 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5096 if (regno == parm_regs[i])
5101 /* Return if we do not know how to pass TYPE solely in registers. */
5104 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5106 if (must_pass_in_stack_var_size_or_pad (mode, type))
5109 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5110 The layout_type routine is crafty and tries to trick us into passing
5111 currently unsupported vector types on the stack by using TImode. */
5112 return (!TARGET_64BIT && mode == TImode
5113 && type && TREE_CODE (type) != VECTOR_TYPE);
5116 /* It returns the size, in bytes, of the area reserved for arguments passed
5117 in registers for the function represented by fndecl dependent to the used
5120 ix86_reg_parm_stack_space (const_tree fndecl)
5122 enum calling_abi call_abi = SYSV_ABI;
5123 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5124 call_abi = ix86_function_abi (fndecl);
5126 call_abi = ix86_function_type_abi (fndecl);
5127 if (call_abi == MS_ABI)
5132 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5135 ix86_function_type_abi (const_tree fntype)
5137 if (TARGET_64BIT && fntype != NULL)
5139 enum calling_abi abi = ix86_abi;
5140 if (abi == SYSV_ABI)
5142 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5145 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5153 ix86_function_ms_hook_prologue (const_tree fn)
5155 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5157 if (decl_function_context (fn) != NULL_TREE)
5158 error_at (DECL_SOURCE_LOCATION (fn),
5159 "ms_hook_prologue is not compatible with nested function");
5166 static enum calling_abi
5167 ix86_function_abi (const_tree fndecl)
5171 return ix86_function_type_abi (TREE_TYPE (fndecl));
5174 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5177 ix86_cfun_abi (void)
5179 if (! cfun || ! TARGET_64BIT)
5181 return cfun->machine->call_abi;
5184 /* Write the extra assembler code needed to declare a function properly. */
5187 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5190 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5194 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5195 unsigned int filler_cc = 0xcccccccc;
5197 for (i = 0; i < filler_count; i += 4)
5198 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5201 ASM_OUTPUT_LABEL (asm_out_file, fname);
5203 /* Output magic byte marker, if hot-patch attribute is set. */
5208 /* leaq [%rsp + 0], %rsp */
5209 asm_fprintf (asm_out_file, ASM_BYTE
5210 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5214 /* movl.s %edi, %edi
5216 movl.s %esp, %ebp */
5217 asm_fprintf (asm_out_file, ASM_BYTE
5218 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5224 extern void init_regs (void);
5226 /* Implementation of call abi switching target hook. Specific to FNDECL
5227 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
5228 for more details. */
5230 ix86_call_abi_override (const_tree fndecl)
5232 if (fndecl == NULL_TREE)
5233 cfun->machine->call_abi = ix86_abi;
5235 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5238 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
5239 re-initialization of init_regs each time we switch function context since
5240 this is needed only during RTL expansion. */
5242 ix86_maybe_switch_abi (void)
5245 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5249 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5250 for a call to a function whose data type is FNTYPE.
5251 For a library call, FNTYPE is 0. */
5254 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5255 tree fntype, /* tree ptr for function decl */
5256 rtx libname, /* SYMBOL_REF of library name or 0 */
5259 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
5260 memset (cum, 0, sizeof (*cum));
5263 cum->call_abi = ix86_function_abi (fndecl);
5265 cum->call_abi = ix86_function_type_abi (fntype);
5266 /* Set up the number of registers to use for passing arguments. */
5268 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5269 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5270 "or subtarget optimization implying it");
5271 cum->nregs = ix86_regparm;
5274 cum->nregs = (cum->call_abi == SYSV_ABI
5275 ? X86_64_REGPARM_MAX
5276 : X86_64_MS_REGPARM_MAX);
5280 cum->sse_nregs = SSE_REGPARM_MAX;
5283 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5284 ? X86_64_SSE_REGPARM_MAX
5285 : X86_64_MS_SSE_REGPARM_MAX);
5289 cum->mmx_nregs = MMX_REGPARM_MAX;
5290 cum->warn_avx = true;
5291 cum->warn_sse = true;
5292 cum->warn_mmx = true;
5294 /* Because type might mismatch in between caller and callee, we need to
5295 use actual type of function for local calls.
5296 FIXME: cgraph_analyze can be told to actually record if function uses
5297 va_start so for local functions maybe_vaarg can be made aggressive
5299 FIXME: once typesytem is fixed, we won't need this code anymore. */
5301 fntype = TREE_TYPE (fndecl);
5302 cum->maybe_vaarg = (fntype
5303 ? (!prototype_p (fntype) || stdarg_p (fntype))
5308 /* If there are variable arguments, then we won't pass anything
5309 in registers in 32-bit mode. */
5310 if (stdarg_p (fntype))
5321 /* Use ecx and edx registers if function has fastcall attribute,
5322 else look for regparm information. */
5325 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
5328 cum->fastcall = 1; /* Same first register as in fastcall. */
5330 else if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
5336 cum->nregs = ix86_function_regparm (fntype, fndecl);
5339 /* Set up the number of SSE registers used for passing SFmode
5340 and DFmode arguments. Warn for mismatching ABI. */
5341 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
5345 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5346 But in the case of vector types, it is some vector mode.
5348 When we have only some of our vector isa extensions enabled, then there
5349 are some modes for which vector_mode_supported_p is false. For these
5350 modes, the generic vector support in gcc will choose some non-vector mode
5351 in order to implement the type. By computing the natural mode, we'll
5352 select the proper ABI location for the operand and not depend on whatever
5353 the middle-end decides to do with these vector types.
5355 The midde-end can't deal with the vector types > 16 bytes. In this
5356 case, we return the original mode and warn ABI change if CUM isn't
5359 static enum machine_mode
5360 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
5362 enum machine_mode mode = TYPE_MODE (type);
5364 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
5366 HOST_WIDE_INT size = int_size_in_bytes (type);
5367 if ((size == 8 || size == 16 || size == 32)
5368 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5369 && TYPE_VECTOR_SUBPARTS (type) > 1)
5371 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
5373 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5374 mode = MIN_MODE_VECTOR_FLOAT;
5376 mode = MIN_MODE_VECTOR_INT;
5378 /* Get the mode which has this inner mode and number of units. */
5379 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
5380 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
5381 && GET_MODE_INNER (mode) == innermode)
5383 if (size == 32 && !TARGET_AVX)
5385 static bool warnedavx;
5392 warning (0, "AVX vector argument without AVX "
5393 "enabled changes the ABI");
5395 return TYPE_MODE (type);
5408 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5409 this may not agree with the mode that the type system has chosen for the
5410 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5411 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5414 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5419 if (orig_mode != BLKmode)
5420 tmp = gen_rtx_REG (orig_mode, regno);
5423 tmp = gen_rtx_REG (mode, regno);
5424 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5425 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5431 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5432 of this code is to classify each 8bytes of incoming argument by the register
5433 class and assign registers accordingly. */
5435 /* Return the union class of CLASS1 and CLASS2.
5436 See the x86-64 PS ABI for details. */
5438 static enum x86_64_reg_class
5439 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5441 /* Rule #1: If both classes are equal, this is the resulting class. */
5442 if (class1 == class2)
5445 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5447 if (class1 == X86_64_NO_CLASS)
5449 if (class2 == X86_64_NO_CLASS)
5452 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5453 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5454 return X86_64_MEMORY_CLASS;
5456 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5457 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5458 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5459 return X86_64_INTEGERSI_CLASS;
5460 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5461 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5462 return X86_64_INTEGER_CLASS;
5464 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5466 if (class1 == X86_64_X87_CLASS
5467 || class1 == X86_64_X87UP_CLASS
5468 || class1 == X86_64_COMPLEX_X87_CLASS
5469 || class2 == X86_64_X87_CLASS
5470 || class2 == X86_64_X87UP_CLASS
5471 || class2 == X86_64_COMPLEX_X87_CLASS)
5472 return X86_64_MEMORY_CLASS;
5474 /* Rule #6: Otherwise class SSE is used. */
5475 return X86_64_SSE_CLASS;
5478 /* Classify the argument of type TYPE and mode MODE.
5479 CLASSES will be filled by the register class used to pass each word
5480 of the operand. The number of words is returned. In case the parameter
5481 should be passed in memory, 0 is returned. As a special case for zero
5482 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5484 BIT_OFFSET is used internally for handling records and specifies offset
5485 of the offset in bits modulo 256 to avoid overflow cases.
5487 See the x86-64 PS ABI for details.
5491 classify_argument (enum machine_mode mode, const_tree type,
5492 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5494 HOST_WIDE_INT bytes =
5495 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5496 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5498 /* Variable sized entities are always passed/returned in memory. */
5502 if (mode != VOIDmode
5503 && targetm.calls.must_pass_in_stack (mode, type))
5506 if (type && AGGREGATE_TYPE_P (type))
5510 enum x86_64_reg_class subclasses[MAX_CLASSES];
5512 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5516 for (i = 0; i < words; i++)
5517 classes[i] = X86_64_NO_CLASS;
5519 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5520 signalize memory class, so handle it as special case. */
5523 classes[0] = X86_64_NO_CLASS;
5527 /* Classify each field of record and merge classes. */
5528 switch (TREE_CODE (type))
5531 /* And now merge the fields of structure. */
5532 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5534 if (TREE_CODE (field) == FIELD_DECL)
5538 if (TREE_TYPE (field) == error_mark_node)
5541 /* Bitfields are always classified as integer. Handle them
5542 early, since later code would consider them to be
5543 misaligned integers. */
5544 if (DECL_BIT_FIELD (field))
5546 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5547 i < ((int_bit_position (field) + (bit_offset % 64))
5548 + tree_low_cst (DECL_SIZE (field), 0)
5551 merge_classes (X86_64_INTEGER_CLASS,
5558 type = TREE_TYPE (field);
5560 /* Flexible array member is ignored. */
5561 if (TYPE_MODE (type) == BLKmode
5562 && TREE_CODE (type) == ARRAY_TYPE
5563 && TYPE_SIZE (type) == NULL_TREE
5564 && TYPE_DOMAIN (type) != NULL_TREE
5565 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5570 if (!warned && warn_psabi)
5573 inform (input_location,
5574 "The ABI of passing struct with"
5575 " a flexible array member has"
5576 " changed in GCC 4.4");
5580 num = classify_argument (TYPE_MODE (type), type,
5582 (int_bit_position (field)
5583 + bit_offset) % 256);
5586 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5587 for (i = 0; i < num && (i + pos) < words; i++)
5589 merge_classes (subclasses[i], classes[i + pos]);
5596 /* Arrays are handled as small records. */
5599 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5600 TREE_TYPE (type), subclasses, bit_offset);
5604 /* The partial classes are now full classes. */
5605 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5606 subclasses[0] = X86_64_SSE_CLASS;
5607 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5608 && !((bit_offset % 64) == 0 && bytes == 4))
5609 subclasses[0] = X86_64_INTEGER_CLASS;
5611 for (i = 0; i < words; i++)
5612 classes[i] = subclasses[i % num];
5617 case QUAL_UNION_TYPE:
5618 /* Unions are similar to RECORD_TYPE but offset is always 0.
5620 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5622 if (TREE_CODE (field) == FIELD_DECL)
5626 if (TREE_TYPE (field) == error_mark_node)
5629 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5630 TREE_TYPE (field), subclasses,
5634 for (i = 0; i < num; i++)
5635 classes[i] = merge_classes (subclasses[i], classes[i]);
5646 /* When size > 16 bytes, if the first one isn't
5647 X86_64_SSE_CLASS or any other ones aren't
5648 X86_64_SSEUP_CLASS, everything should be passed in
5650 if (classes[0] != X86_64_SSE_CLASS)
5653 for (i = 1; i < words; i++)
5654 if (classes[i] != X86_64_SSEUP_CLASS)
5658 /* Final merger cleanup. */
5659 for (i = 0; i < words; i++)
5661 /* If one class is MEMORY, everything should be passed in
5663 if (classes[i] == X86_64_MEMORY_CLASS)
5666 /* The X86_64_SSEUP_CLASS should be always preceded by
5667 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5668 if (classes[i] == X86_64_SSEUP_CLASS
5669 && classes[i - 1] != X86_64_SSE_CLASS
5670 && classes[i - 1] != X86_64_SSEUP_CLASS)
5672 /* The first one should never be X86_64_SSEUP_CLASS. */
5673 gcc_assert (i != 0);
5674 classes[i] = X86_64_SSE_CLASS;
5677 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5678 everything should be passed in memory. */
5679 if (classes[i] == X86_64_X87UP_CLASS
5680 && (classes[i - 1] != X86_64_X87_CLASS))
5684 /* The first one should never be X86_64_X87UP_CLASS. */
5685 gcc_assert (i != 0);
5686 if (!warned && warn_psabi)
5689 inform (input_location,
5690 "The ABI of passing union with long double"
5691 " has changed in GCC 4.4");
5699 /* Compute alignment needed. We align all types to natural boundaries with
5700 exception of XFmode that is aligned to 64bits. */
5701 if (mode != VOIDmode && mode != BLKmode)
5703 int mode_alignment = GET_MODE_BITSIZE (mode);
5706 mode_alignment = 128;
5707 else if (mode == XCmode)
5708 mode_alignment = 256;
5709 if (COMPLEX_MODE_P (mode))
5710 mode_alignment /= 2;
5711 /* Misaligned fields are always returned in memory. */
5712 if (bit_offset % mode_alignment)
5716 /* for V1xx modes, just use the base mode */
5717 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
5718 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5719 mode = GET_MODE_INNER (mode);
5721 /* Classification of atomic types. */
5726 classes[0] = X86_64_SSE_CLASS;
5729 classes[0] = X86_64_SSE_CLASS;
5730 classes[1] = X86_64_SSEUP_CLASS;
5740 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5744 classes[0] = X86_64_INTEGERSI_CLASS;
5747 else if (size <= 64)
5749 classes[0] = X86_64_INTEGER_CLASS;
5752 else if (size <= 64+32)
5754 classes[0] = X86_64_INTEGER_CLASS;
5755 classes[1] = X86_64_INTEGERSI_CLASS;
5758 else if (size <= 64+64)
5760 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5768 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5772 /* OImode shouldn't be used directly. */
5777 if (!(bit_offset % 64))
5778 classes[0] = X86_64_SSESF_CLASS;
5780 classes[0] = X86_64_SSE_CLASS;
5783 classes[0] = X86_64_SSEDF_CLASS;
5786 classes[0] = X86_64_X87_CLASS;
5787 classes[1] = X86_64_X87UP_CLASS;
5790 classes[0] = X86_64_SSE_CLASS;
5791 classes[1] = X86_64_SSEUP_CLASS;
5794 classes[0] = X86_64_SSE_CLASS;
5795 if (!(bit_offset % 64))
5801 if (!warned && warn_psabi)
5804 inform (input_location,
5805 "The ABI of passing structure with complex float"
5806 " member has changed in GCC 4.4");
5808 classes[1] = X86_64_SSESF_CLASS;
5812 classes[0] = X86_64_SSEDF_CLASS;
5813 classes[1] = X86_64_SSEDF_CLASS;
5816 classes[0] = X86_64_COMPLEX_X87_CLASS;
5819 /* This modes is larger than 16 bytes. */
5827 classes[0] = X86_64_SSE_CLASS;
5828 classes[1] = X86_64_SSEUP_CLASS;
5829 classes[2] = X86_64_SSEUP_CLASS;
5830 classes[3] = X86_64_SSEUP_CLASS;
5838 classes[0] = X86_64_SSE_CLASS;
5839 classes[1] = X86_64_SSEUP_CLASS;
5847 classes[0] = X86_64_SSE_CLASS;
5853 gcc_assert (VECTOR_MODE_P (mode));
5858 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5860 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5861 classes[0] = X86_64_INTEGERSI_CLASS;
5863 classes[0] = X86_64_INTEGER_CLASS;
5864 classes[1] = X86_64_INTEGER_CLASS;
5865 return 1 + (bytes > 8);
5869 /* Examine the argument and return set number of register required in each
5870 class. Return 0 iff parameter should be passed in memory. */
5872 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5873 int *int_nregs, int *sse_nregs)
5875 enum x86_64_reg_class regclass[MAX_CLASSES];
5876 int n = classify_argument (mode, type, regclass, 0);
5882 for (n--; n >= 0; n--)
5883 switch (regclass[n])
5885 case X86_64_INTEGER_CLASS:
5886 case X86_64_INTEGERSI_CLASS:
5889 case X86_64_SSE_CLASS:
5890 case X86_64_SSESF_CLASS:
5891 case X86_64_SSEDF_CLASS:
5894 case X86_64_NO_CLASS:
5895 case X86_64_SSEUP_CLASS:
5897 case X86_64_X87_CLASS:
5898 case X86_64_X87UP_CLASS:
5902 case X86_64_COMPLEX_X87_CLASS:
5903 return in_return ? 2 : 0;
5904 case X86_64_MEMORY_CLASS:
5910 /* Construct container for the argument used by GCC interface. See
5911 FUNCTION_ARG for the detailed description. */
5914 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5915 const_tree type, int in_return, int nintregs, int nsseregs,
5916 const int *intreg, int sse_regno)
5918 /* The following variables hold the static issued_error state. */
5919 static bool issued_sse_arg_error;
5920 static bool issued_sse_ret_error;
5921 static bool issued_x87_ret_error;
5923 enum machine_mode tmpmode;
5925 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5926 enum x86_64_reg_class regclass[MAX_CLASSES];
5930 int needed_sseregs, needed_intregs;
5931 rtx exp[MAX_CLASSES];
5934 n = classify_argument (mode, type, regclass, 0);
5937 if (!examine_argument (mode, type, in_return, &needed_intregs,
5940 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5943 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5944 some less clueful developer tries to use floating-point anyway. */
5945 if (needed_sseregs && !TARGET_SSE)
5949 if (!issued_sse_ret_error)
5951 error ("SSE register return with SSE disabled");
5952 issued_sse_ret_error = true;
5955 else if (!issued_sse_arg_error)
5957 error ("SSE register argument with SSE disabled");
5958 issued_sse_arg_error = true;
5963 /* Likewise, error if the ABI requires us to return values in the
5964 x87 registers and the user specified -mno-80387. */
5965 if (!TARGET_80387 && in_return)
5966 for (i = 0; i < n; i++)
5967 if (regclass[i] == X86_64_X87_CLASS
5968 || regclass[i] == X86_64_X87UP_CLASS
5969 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5971 if (!issued_x87_ret_error)
5973 error ("x87 register return with x87 disabled");
5974 issued_x87_ret_error = true;
5979 /* First construct simple cases. Avoid SCmode, since we want to use
5980 single register to pass this type. */
5981 if (n == 1 && mode != SCmode)
5982 switch (regclass[0])
5984 case X86_64_INTEGER_CLASS:
5985 case X86_64_INTEGERSI_CLASS:
5986 return gen_rtx_REG (mode, intreg[0]);
5987 case X86_64_SSE_CLASS:
5988 case X86_64_SSESF_CLASS:
5989 case X86_64_SSEDF_CLASS:
5990 if (mode != BLKmode)
5991 return gen_reg_or_parallel (mode, orig_mode,
5992 SSE_REGNO (sse_regno));
5994 case X86_64_X87_CLASS:
5995 case X86_64_COMPLEX_X87_CLASS:
5996 return gen_rtx_REG (mode, FIRST_STACK_REG);
5997 case X86_64_NO_CLASS:
5998 /* Zero sized array, struct or class. */
6003 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
6004 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
6005 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6007 && regclass[0] == X86_64_SSE_CLASS
6008 && regclass[1] == X86_64_SSEUP_CLASS
6009 && regclass[2] == X86_64_SSEUP_CLASS
6010 && regclass[3] == X86_64_SSEUP_CLASS
6012 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
6015 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
6016 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6017 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
6018 && regclass[1] == X86_64_INTEGER_CLASS
6019 && (mode == CDImode || mode == TImode || mode == TFmode)
6020 && intreg[0] + 1 == intreg[1])
6021 return gen_rtx_REG (mode, intreg[0]);
6023 /* Otherwise figure out the entries of the PARALLEL. */
6024 for (i = 0; i < n; i++)
6028 switch (regclass[i])
6030 case X86_64_NO_CLASS:
6032 case X86_64_INTEGER_CLASS:
6033 case X86_64_INTEGERSI_CLASS:
6034 /* Merge TImodes on aligned occasions here too. */
6035 if (i * 8 + 8 > bytes)
6036 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6037 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6041 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6042 if (tmpmode == BLKmode)
6044 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6045 gen_rtx_REG (tmpmode, *intreg),
6049 case X86_64_SSESF_CLASS:
6050 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6051 gen_rtx_REG (SFmode,
6052 SSE_REGNO (sse_regno)),
6056 case X86_64_SSEDF_CLASS:
6057 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6058 gen_rtx_REG (DFmode,
6059 SSE_REGNO (sse_regno)),
6063 case X86_64_SSE_CLASS:
6071 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6081 && regclass[1] == X86_64_SSEUP_CLASS
6082 && regclass[2] == X86_64_SSEUP_CLASS
6083 && regclass[3] == X86_64_SSEUP_CLASS);
6090 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6091 gen_rtx_REG (tmpmode,
6092 SSE_REGNO (sse_regno)),
6101 /* Empty aligned struct, union or class. */
6105 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6106 for (i = 0; i < nexps; i++)
6107 XVECEXP (ret, 0, i) = exp [i];
6111 /* Update the data in CUM to advance over an argument of mode MODE
6112 and data type TYPE. (TYPE is null for libcalls where that information
6113 may not be available.) */
6116 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6117 const_tree type, HOST_WIDE_INT bytes,
6118 HOST_WIDE_INT words)
6134 cum->words += words;
6135 cum->nregs -= words;
6136 cum->regno += words;
6138 if (cum->nregs <= 0)
6146 /* OImode shouldn't be used directly. */
6150 if (cum->float_in_sse < 2)
6153 if (cum->float_in_sse < 1)
6170 if (!type || !AGGREGATE_TYPE_P (type))
6172 cum->sse_words += words;
6173 cum->sse_nregs -= 1;
6174 cum->sse_regno += 1;
6175 if (cum->sse_nregs <= 0)
6189 if (!type || !AGGREGATE_TYPE_P (type))
6191 cum->mmx_words += words;
6192 cum->mmx_nregs -= 1;
6193 cum->mmx_regno += 1;
6194 if (cum->mmx_nregs <= 0)
6205 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6206 const_tree type, HOST_WIDE_INT words, bool named)
6208 int int_nregs, sse_nregs;
6210 /* Unnamed 256bit vector mode parameters are passed on stack. */
6211 if (!named && VALID_AVX256_REG_MODE (mode))
6214 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6215 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6217 cum->nregs -= int_nregs;
6218 cum->sse_nregs -= sse_nregs;
6219 cum->regno += int_nregs;
6220 cum->sse_regno += sse_nregs;
6224 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6225 cum->words = (cum->words + align - 1) & ~(align - 1);
6226 cum->words += words;
6231 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6232 HOST_WIDE_INT words)
6234 /* Otherwise, this should be passed indirect. */
6235 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6237 cum->words += words;
6245 /* Update the data in CUM to advance over an argument of mode MODE and
6246 data type TYPE. (TYPE is null for libcalls where that information
6247 may not be available.) */
6250 ix86_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6251 const_tree type, bool named)
6253 HOST_WIDE_INT bytes, words;
6255 if (mode == BLKmode)
6256 bytes = int_size_in_bytes (type);
6258 bytes = GET_MODE_SIZE (mode);
6259 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6262 mode = type_natural_mode (type, NULL);
6264 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6265 function_arg_advance_ms_64 (cum, bytes, words);
6266 else if (TARGET_64BIT)
6267 function_arg_advance_64 (cum, mode, type, words, named);
6269 function_arg_advance_32 (cum, mode, type, bytes, words);
6272 /* Define where to put the arguments to a function.
6273 Value is zero to push the argument on the stack,
6274 or a hard register in which to store the argument.
6276 MODE is the argument's machine mode.
6277 TYPE is the data type of the argument (as a tree).
6278 This is null for libcalls where that information may
6280 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6281 the preceding args and about the function being called.
6282 NAMED is nonzero if this argument is a named parameter
6283 (otherwise it is an extra parameter matching an ellipsis). */
6286 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6287 enum machine_mode orig_mode, const_tree type,
6288 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6290 static bool warnedsse, warnedmmx;
6292 /* Avoid the AL settings for the Unix64 ABI. */
6293 if (mode == VOIDmode)
6309 if (words <= cum->nregs)
6311 int regno = cum->regno;
6313 /* Fastcall allocates the first two DWORD (SImode) or
6314 smaller arguments to ECX and EDX if it isn't an
6320 || (type && AGGREGATE_TYPE_P (type)))
6323 /* ECX not EAX is the first allocated register. */
6324 if (regno == AX_REG)
6327 return gen_rtx_REG (mode, regno);
6332 if (cum->float_in_sse < 2)
6335 if (cum->float_in_sse < 1)
6339 /* In 32bit, we pass TImode in xmm registers. */
6346 if (!type || !AGGREGATE_TYPE_P (type))
6348 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
6351 warning (0, "SSE vector argument without SSE enabled "
6355 return gen_reg_or_parallel (mode, orig_mode,
6356 cum->sse_regno + FIRST_SSE_REG);
6361 /* OImode shouldn't be used directly. */
6370 if (!type || !AGGREGATE_TYPE_P (type))
6373 return gen_reg_or_parallel (mode, orig_mode,
6374 cum->sse_regno + FIRST_SSE_REG);
6384 if (!type || !AGGREGATE_TYPE_P (type))
6386 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
6389 warning (0, "MMX vector argument without MMX enabled "
6393 return gen_reg_or_parallel (mode, orig_mode,
6394 cum->mmx_regno + FIRST_MMX_REG);
6403 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6404 enum machine_mode orig_mode, const_tree type, bool named)
6406 /* Handle a hidden AL argument containing number of registers
6407 for varargs x86-64 functions. */
6408 if (mode == VOIDmode)
6409 return GEN_INT (cum->maybe_vaarg
6410 ? (cum->sse_nregs < 0
6411 ? X86_64_SSE_REGPARM_MAX
6426 /* Unnamed 256bit vector mode parameters are passed on stack. */
6432 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6434 &x86_64_int_parameter_registers [cum->regno],
6439 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6440 enum machine_mode orig_mode, bool named,
6441 HOST_WIDE_INT bytes)
6445 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6446 We use value of -2 to specify that current function call is MSABI. */
6447 if (mode == VOIDmode)
6448 return GEN_INT (-2);
6450 /* If we've run out of registers, it goes on the stack. */
6451 if (cum->nregs == 0)
6454 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6456 /* Only floating point modes are passed in anything but integer regs. */
6457 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6460 regno = cum->regno + FIRST_SSE_REG;
6465 /* Unnamed floating parameters are passed in both the
6466 SSE and integer registers. */
6467 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
6468 t2 = gen_rtx_REG (mode, regno);
6469 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
6470 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
6471 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
6474 /* Handle aggregated types passed in register. */
6475 if (orig_mode == BLKmode)
6477 if (bytes > 0 && bytes <= 8)
6478 mode = (bytes > 4 ? DImode : SImode);
6479 if (mode == BLKmode)
6483 return gen_reg_or_parallel (mode, orig_mode, regno);
6486 /* Return where to put the arguments to a function.
6487 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6489 MODE is the argument's machine mode. TYPE is the data type of the
6490 argument. It is null for libcalls where that information may not be
6491 available. CUM gives information about the preceding args and about
6492 the function being called. NAMED is nonzero if this argument is a
6493 named parameter (otherwise it is an extra parameter matching an
6497 ix86_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
6498 const_tree type, bool named)
6500 enum machine_mode mode = omode;
6501 HOST_WIDE_INT bytes, words;
6503 if (mode == BLKmode)
6504 bytes = int_size_in_bytes (type);
6506 bytes = GET_MODE_SIZE (mode);
6507 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6509 /* To simplify the code below, represent vector types with a vector mode
6510 even if MMX/SSE are not active. */
6511 if (type && TREE_CODE (type) == VECTOR_TYPE)
6512 mode = type_natural_mode (type, cum);
6514 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6515 return function_arg_ms_64 (cum, mode, omode, named, bytes);
6516 else if (TARGET_64BIT)
6517 return function_arg_64 (cum, mode, omode, type, named);
6519 return function_arg_32 (cum, mode, omode, type, bytes, words);
6522 /* A C expression that indicates when an argument must be passed by
6523 reference. If nonzero for an argument, a copy of that argument is
6524 made in memory and a pointer to the argument is passed instead of
6525 the argument itself. The pointer is passed in whatever way is
6526 appropriate for passing a pointer to that type. */
6529 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
6530 enum machine_mode mode ATTRIBUTE_UNUSED,
6531 const_tree type, bool named ATTRIBUTE_UNUSED)
6533 /* See Windows x64 Software Convention. */
6534 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6536 int msize = (int) GET_MODE_SIZE (mode);
6539 /* Arrays are passed by reference. */
6540 if (TREE_CODE (type) == ARRAY_TYPE)
6543 if (AGGREGATE_TYPE_P (type))
6545 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6546 are passed by reference. */
6547 msize = int_size_in_bytes (type);
6551 /* __m128 is passed by reference. */
6553 case 1: case 2: case 4: case 8:
6559 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6565 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
6568 contains_aligned_value_p (const_tree type)
6570 enum machine_mode mode = TYPE_MODE (type);
6571 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6575 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6577 if (TYPE_ALIGN (type) < 128)
6580 if (AGGREGATE_TYPE_P (type))
6582 /* Walk the aggregates recursively. */
6583 switch (TREE_CODE (type))
6587 case QUAL_UNION_TYPE:
6591 /* Walk all the structure fields. */
6592 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6594 if (TREE_CODE (field) == FIELD_DECL
6595 && contains_aligned_value_p (TREE_TYPE (field)))
6602 /* Just for use if some languages passes arrays by value. */
6603 if (contains_aligned_value_p (TREE_TYPE (type)))
6614 /* Gives the alignment boundary, in bits, of an argument with the
6615 specified mode and type. */
6618 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
6623 /* Since the main variant type is used for call, we convert it to
6624 the main variant type. */
6625 type = TYPE_MAIN_VARIANT (type);
6626 align = TYPE_ALIGN (type);
6629 align = GET_MODE_ALIGNMENT (mode);
6630 if (align < PARM_BOUNDARY)
6631 align = PARM_BOUNDARY;
6632 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6633 natural boundaries. */
6634 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6636 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6637 make an exception for SSE modes since these require 128bit
6640 The handling here differs from field_alignment. ICC aligns MMX
6641 arguments to 4 byte boundaries, while structure fields are aligned
6642 to 8 byte boundaries. */
6645 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6646 align = PARM_BOUNDARY;
6650 if (!contains_aligned_value_p (type))
6651 align = PARM_BOUNDARY;
6654 if (align > BIGGEST_ALIGNMENT)
6655 align = BIGGEST_ALIGNMENT;
6659 /* Return true if N is a possible register number of function value. */
6662 ix86_function_value_regno_p (const unsigned int regno)
6669 case FIRST_FLOAT_REG:
6670 /* TODO: The function should depend on current function ABI but
6671 builtins.c would need updating then. Therefore we use the
6673 if (TARGET_64BIT && ix86_abi == MS_ABI)
6675 return TARGET_FLOAT_RETURNS_IN_80387;
6681 if (TARGET_MACHO || TARGET_64BIT)
6689 /* Define how to find the value returned by a function.
6690 VALTYPE is the data type of the value (as a tree).
6691 If the precise function being called is known, FUNC is its FUNCTION_DECL;
6692 otherwise, FUNC is 0. */
6695 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
6696 const_tree fntype, const_tree fn)
6700 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
6701 we normally prevent this case when mmx is not available. However
6702 some ABIs may require the result to be returned like DImode. */
6703 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6704 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
6706 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
6707 we prevent this case when sse is not available. However some ABIs
6708 may require the result to be returned like integer TImode. */
6709 else if (mode == TImode
6710 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6711 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
6713 /* 32-byte vector modes in %ymm0. */
6714 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
6715 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
6717 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
6718 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
6719 regno = FIRST_FLOAT_REG;
6721 /* Most things go in %eax. */
6724 /* Override FP return register with %xmm0 for local functions when
6725 SSE math is enabled or for functions with sseregparm attribute. */
6726 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
6728 int sse_level = ix86_function_sseregparm (fntype, fn, false);
6729 if ((sse_level >= 1 && mode == SFmode)
6730 || (sse_level == 2 && mode == DFmode))
6731 regno = FIRST_SSE_REG;
6734 /* OImode shouldn't be used directly. */
6735 gcc_assert (mode != OImode);
6737 return gen_rtx_REG (orig_mode, regno);
6741 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
6746 /* Handle libcalls, which don't provide a type node. */
6747 if (valtype == NULL)
6759 return gen_rtx_REG (mode, FIRST_SSE_REG);
6762 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
6766 return gen_rtx_REG (mode, AX_REG);
6770 ret = construct_container (mode, orig_mode, valtype, 1,
6771 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6772 x86_64_int_return_registers, 0);
6774 /* For zero sized structures, construct_container returns NULL, but we
6775 need to keep rest of compiler happy by returning meaningful value. */
6777 ret = gen_rtx_REG (orig_mode, AX_REG);
6783 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6785 unsigned int regno = AX_REG;
6789 switch (GET_MODE_SIZE (mode))
6792 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6793 && !COMPLEX_MODE_P (mode))
6794 regno = FIRST_SSE_REG;
6798 if (mode == SFmode || mode == DFmode)
6799 regno = FIRST_SSE_REG;
6805 return gen_rtx_REG (orig_mode, regno);
6809 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6810 enum machine_mode orig_mode, enum machine_mode mode)
6812 const_tree fn, fntype;
6815 if (fntype_or_decl && DECL_P (fntype_or_decl))
6816 fn = fntype_or_decl;
6817 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6819 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6820 return function_value_ms_64 (orig_mode, mode);
6821 else if (TARGET_64BIT)
6822 return function_value_64 (orig_mode, mode, valtype);
6824 return function_value_32 (orig_mode, mode, fntype, fn);
6828 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6829 bool outgoing ATTRIBUTE_UNUSED)
6831 enum machine_mode mode, orig_mode;
6833 orig_mode = TYPE_MODE (valtype);
6834 mode = type_natural_mode (valtype, NULL);
6835 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6839 ix86_libcall_value (enum machine_mode mode)
6841 return ix86_function_value_1 (NULL, NULL, mode, mode);
6844 /* Return true iff type is returned in memory. */
6846 static bool ATTRIBUTE_UNUSED
6847 return_in_memory_32 (const_tree type, enum machine_mode mode)
6851 if (mode == BLKmode)
6854 size = int_size_in_bytes (type);
6856 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6859 if (VECTOR_MODE_P (mode) || mode == TImode)
6861 /* User-created vectors small enough to fit in EAX. */
6865 /* MMX/3dNow values are returned in MM0,
6866 except when it doesn't exits or the ABI prescribes otherwise. */
6868 return !TARGET_MMX || TARGET_VECT8_RETURNS;
6870 /* SSE values are returned in XMM0, except when it doesn't exist. */
6874 /* AVX values are returned in YMM0, except when it doesn't exist. */
6885 /* OImode shouldn't be used directly. */
6886 gcc_assert (mode != OImode);
6891 static bool ATTRIBUTE_UNUSED
6892 return_in_memory_64 (const_tree type, enum machine_mode mode)
6894 int needed_intregs, needed_sseregs;
6895 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6898 static bool ATTRIBUTE_UNUSED
6899 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6901 HOST_WIDE_INT size = int_size_in_bytes (type);
6903 /* __m128 is returned in xmm0. */
6904 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6905 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6908 /* Otherwise, the size must be exactly in [1248]. */
6909 return size != 1 && size != 2 && size != 4 && size != 8;
6913 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6915 #ifdef SUBTARGET_RETURN_IN_MEMORY
6916 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6918 const enum machine_mode mode = type_natural_mode (type, NULL);
6922 if (ix86_function_type_abi (fntype) == MS_ABI)
6923 return return_in_memory_ms_64 (type, mode);
6925 return return_in_memory_64 (type, mode);
6928 return return_in_memory_32 (type, mode);
6932 /* When returning SSE vector types, we have a choice of either
6933 (1) being abi incompatible with a -march switch, or
6934 (2) generating an error.
6935 Given no good solution, I think the safest thing is one warning.
6936 The user won't be able to use -Werror, but....
6938 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6939 called in response to actually generating a caller or callee that
6940 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6941 via aggregate_value_p for general type probing from tree-ssa. */
6944 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6946 static bool warnedsse, warnedmmx;
6948 if (!TARGET_64BIT && type)
6950 /* Look at the return type of the function, not the function type. */
6951 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6953 if (!TARGET_SSE && !warnedsse)
6956 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6959 warning (0, "SSE vector return without SSE enabled "
6964 if (!TARGET_MMX && !warnedmmx)
6966 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6969 warning (0, "MMX vector return without MMX enabled "
6979 /* Create the va_list data type. */
6981 /* Returns the calling convention specific va_list date type.
6982 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
6985 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6987 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6989 /* For i386 we use plain pointer to argument area. */
6990 if (!TARGET_64BIT || abi == MS_ABI)
6991 return build_pointer_type (char_type_node);
6993 record = lang_hooks.types.make_type (RECORD_TYPE);
6994 type_decl = build_decl (BUILTINS_LOCATION,
6995 TYPE_DECL, get_identifier ("__va_list_tag"), record);
6997 f_gpr = build_decl (BUILTINS_LOCATION,
6998 FIELD_DECL, get_identifier ("gp_offset"),
6999 unsigned_type_node);
7000 f_fpr = build_decl (BUILTINS_LOCATION,
7001 FIELD_DECL, get_identifier ("fp_offset"),
7002 unsigned_type_node);
7003 f_ovf = build_decl (BUILTINS_LOCATION,
7004 FIELD_DECL, get_identifier ("overflow_arg_area"),
7006 f_sav = build_decl (BUILTINS_LOCATION,
7007 FIELD_DECL, get_identifier ("reg_save_area"),
7010 va_list_gpr_counter_field = f_gpr;
7011 va_list_fpr_counter_field = f_fpr;
7013 DECL_FIELD_CONTEXT (f_gpr) = record;
7014 DECL_FIELD_CONTEXT (f_fpr) = record;
7015 DECL_FIELD_CONTEXT (f_ovf) = record;
7016 DECL_FIELD_CONTEXT (f_sav) = record;
7018 TREE_CHAIN (record) = type_decl;
7019 TYPE_NAME (record) = type_decl;
7020 TYPE_FIELDS (record) = f_gpr;
7021 DECL_CHAIN (f_gpr) = f_fpr;
7022 DECL_CHAIN (f_fpr) = f_ovf;
7023 DECL_CHAIN (f_ovf) = f_sav;
7025 layout_type (record);
7027 /* The correct type is an array type of one element. */
7028 return build_array_type (record, build_index_type (size_zero_node));
7031 /* Setup the builtin va_list data type and for 64-bit the additional
7032 calling convention specific va_list data types. */
7035 ix86_build_builtin_va_list (void)
7037 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7039 /* Initialize abi specific va_list builtin types. */
7043 if (ix86_abi == MS_ABI)
7045 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7046 if (TREE_CODE (t) != RECORD_TYPE)
7047 t = build_variant_type_copy (t);
7048 sysv_va_list_type_node = t;
7053 if (TREE_CODE (t) != RECORD_TYPE)
7054 t = build_variant_type_copy (t);
7055 sysv_va_list_type_node = t;
7057 if (ix86_abi != MS_ABI)
7059 t = ix86_build_builtin_va_list_abi (MS_ABI);
7060 if (TREE_CODE (t) != RECORD_TYPE)
7061 t = build_variant_type_copy (t);
7062 ms_va_list_type_node = t;
7067 if (TREE_CODE (t) != RECORD_TYPE)
7068 t = build_variant_type_copy (t);
7069 ms_va_list_type_node = t;
7076 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7079 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7085 /* GPR size of varargs save area. */
7086 if (cfun->va_list_gpr_size)
7087 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7089 ix86_varargs_gpr_size = 0;
7091 /* FPR size of varargs save area. We don't need it if we don't pass
7092 anything in SSE registers. */
7093 if (TARGET_SSE && cfun->va_list_fpr_size)
7094 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7096 ix86_varargs_fpr_size = 0;
7098 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7101 save_area = frame_pointer_rtx;
7102 set = get_varargs_alias_set ();
7104 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7105 if (max > X86_64_REGPARM_MAX)
7106 max = X86_64_REGPARM_MAX;
7108 for (i = cum->regno; i < max; i++)
7110 mem = gen_rtx_MEM (Pmode,
7111 plus_constant (save_area, i * UNITS_PER_WORD));
7112 MEM_NOTRAP_P (mem) = 1;
7113 set_mem_alias_set (mem, set);
7114 emit_move_insn (mem, gen_rtx_REG (Pmode,
7115 x86_64_int_parameter_registers[i]));
7118 if (ix86_varargs_fpr_size)
7120 enum machine_mode smode;
7123 /* Now emit code to save SSE registers. The AX parameter contains number
7124 of SSE parameter registers used to call this function, though all we
7125 actually check here is the zero/non-zero status. */
7127 label = gen_label_rtx ();
7128 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7129 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7132 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7133 we used movdqa (i.e. TImode) instead? Perhaps even better would
7134 be if we could determine the real mode of the data, via a hook
7135 into pass_stdarg. Ignore all that for now. */
7137 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7138 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7140 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7141 if (max > X86_64_SSE_REGPARM_MAX)
7142 max = X86_64_SSE_REGPARM_MAX;
7144 for (i = cum->sse_regno; i < max; ++i)
7146 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7147 mem = gen_rtx_MEM (smode, mem);
7148 MEM_NOTRAP_P (mem) = 1;
7149 set_mem_alias_set (mem, set);
7150 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7152 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7160 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7162 alias_set_type set = get_varargs_alias_set ();
7165 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7169 mem = gen_rtx_MEM (Pmode,
7170 plus_constant (virtual_incoming_args_rtx,
7171 i * UNITS_PER_WORD));
7172 MEM_NOTRAP_P (mem) = 1;
7173 set_mem_alias_set (mem, set);
7175 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7176 emit_move_insn (mem, reg);
7181 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7182 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7185 CUMULATIVE_ARGS next_cum;
7188 /* This argument doesn't appear to be used anymore. Which is good,
7189 because the old code here didn't suppress rtl generation. */
7190 gcc_assert (!no_rtl);
7195 fntype = TREE_TYPE (current_function_decl);
7197 /* For varargs, we do not want to skip the dummy va_dcl argument.
7198 For stdargs, we do want to skip the last named argument. */
7200 if (stdarg_p (fntype))
7201 ix86_function_arg_advance (&next_cum, mode, type, true);
7203 if (cum->call_abi == MS_ABI)
7204 setup_incoming_varargs_ms_64 (&next_cum);
7206 setup_incoming_varargs_64 (&next_cum);
7209 /* Checks if TYPE is of kind va_list char *. */
7212 is_va_list_char_pointer (tree type)
7216 /* For 32-bit it is always true. */
7219 canonic = ix86_canonical_va_list_type (type);
7220 return (canonic == ms_va_list_type_node
7221 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7224 /* Implement va_start. */
7227 ix86_va_start (tree valist, rtx nextarg)
7229 HOST_WIDE_INT words, n_gpr, n_fpr;
7230 tree f_gpr, f_fpr, f_ovf, f_sav;
7231 tree gpr, fpr, ovf, sav, t;
7236 if (flag_split_stack
7237 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7239 unsigned int scratch_regno;
7241 /* When we are splitting the stack, we can't refer to the stack
7242 arguments using internal_arg_pointer, because they may be on
7243 the old stack. The split stack prologue will arrange to
7244 leave a pointer to the old stack arguments in a scratch
7245 register, which we here copy to a pseudo-register. The split
7246 stack prologue can't set the pseudo-register directly because
7247 it (the prologue) runs before any registers have been saved. */
7249 scratch_regno = split_stack_prologue_scratch_regno ();
7250 if (scratch_regno != INVALID_REGNUM)
7254 reg = gen_reg_rtx (Pmode);
7255 cfun->machine->split_stack_varargs_pointer = reg;
7258 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
7262 push_topmost_sequence ();
7263 emit_insn_after (seq, entry_of_function ());
7264 pop_topmost_sequence ();
7268 /* Only 64bit target needs something special. */
7269 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7271 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7272 std_expand_builtin_va_start (valist, nextarg);
7277 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
7278 next = expand_binop (ptr_mode, add_optab,
7279 cfun->machine->split_stack_varargs_pointer,
7280 crtl->args.arg_offset_rtx,
7281 NULL_RTX, 0, OPTAB_LIB_WIDEN);
7282 convert_move (va_r, next, 0);
7287 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7288 f_fpr = DECL_CHAIN (f_gpr);
7289 f_ovf = DECL_CHAIN (f_fpr);
7290 f_sav = DECL_CHAIN (f_ovf);
7292 valist = build_simple_mem_ref (valist);
7293 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
7294 /* The following should be folded into the MEM_REF offset. */
7295 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
7297 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
7299 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
7301 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
7304 /* Count number of gp and fp argument registers used. */
7305 words = crtl->args.info.words;
7306 n_gpr = crtl->args.info.regno;
7307 n_fpr = crtl->args.info.sse_regno;
7309 if (cfun->va_list_gpr_size)
7311 type = TREE_TYPE (gpr);
7312 t = build2 (MODIFY_EXPR, type,
7313 gpr, build_int_cst (type, n_gpr * 8));
7314 TREE_SIDE_EFFECTS (t) = 1;
7315 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7318 if (TARGET_SSE && cfun->va_list_fpr_size)
7320 type = TREE_TYPE (fpr);
7321 t = build2 (MODIFY_EXPR, type, fpr,
7322 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
7323 TREE_SIDE_EFFECTS (t) = 1;
7324 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7327 /* Find the overflow area. */
7328 type = TREE_TYPE (ovf);
7329 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
7330 ovf_rtx = crtl->args.internal_arg_pointer;
7332 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
7333 t = make_tree (type, ovf_rtx);
7335 t = build2 (POINTER_PLUS_EXPR, type, t,
7336 size_int (words * UNITS_PER_WORD));
7337 t = build2 (MODIFY_EXPR, type, ovf, t);
7338 TREE_SIDE_EFFECTS (t) = 1;
7339 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7341 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
7343 /* Find the register save area.
7344 Prologue of the function save it right above stack frame. */
7345 type = TREE_TYPE (sav);
7346 t = make_tree (type, frame_pointer_rtx);
7347 if (!ix86_varargs_gpr_size)
7348 t = build2 (POINTER_PLUS_EXPR, type, t,
7349 size_int (-8 * X86_64_REGPARM_MAX));
7350 t = build2 (MODIFY_EXPR, type, sav, t);
7351 TREE_SIDE_EFFECTS (t) = 1;
7352 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7356 /* Implement va_arg. */
7359 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7362 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
7363 tree f_gpr, f_fpr, f_ovf, f_sav;
7364 tree gpr, fpr, ovf, sav, t;
7366 tree lab_false, lab_over = NULL_TREE;
7371 enum machine_mode nat_mode;
7372 unsigned int arg_boundary;
7374 /* Only 64bit target needs something special. */
7375 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7376 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
7378 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7379 f_fpr = DECL_CHAIN (f_gpr);
7380 f_ovf = DECL_CHAIN (f_fpr);
7381 f_sav = DECL_CHAIN (f_ovf);
7383 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
7384 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
7385 valist = build_va_arg_indirect_ref (valist);
7386 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
7387 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
7388 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
7390 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7392 type = build_pointer_type (type);
7393 size = int_size_in_bytes (type);
7394 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7396 nat_mode = type_natural_mode (type, NULL);
7405 /* Unnamed 256bit vector mode parameters are passed on stack. */
7406 if (ix86_cfun_abi () == SYSV_ABI)
7413 container = construct_container (nat_mode, TYPE_MODE (type),
7414 type, 0, X86_64_REGPARM_MAX,
7415 X86_64_SSE_REGPARM_MAX, intreg,
7420 /* Pull the value out of the saved registers. */
7422 addr = create_tmp_var (ptr_type_node, "addr");
7426 int needed_intregs, needed_sseregs;
7428 tree int_addr, sse_addr;
7430 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7431 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7433 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
7435 need_temp = (!REG_P (container)
7436 && ((needed_intregs && TYPE_ALIGN (type) > 64)
7437 || TYPE_ALIGN (type) > 128));
7439 /* In case we are passing structure, verify that it is consecutive block
7440 on the register save area. If not we need to do moves. */
7441 if (!need_temp && !REG_P (container))
7443 /* Verify that all registers are strictly consecutive */
7444 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
7448 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7450 rtx slot = XVECEXP (container, 0, i);
7451 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
7452 || INTVAL (XEXP (slot, 1)) != i * 16)
7460 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7462 rtx slot = XVECEXP (container, 0, i);
7463 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
7464 || INTVAL (XEXP (slot, 1)) != i * 8)
7476 int_addr = create_tmp_var (ptr_type_node, "int_addr");
7477 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
7480 /* First ensure that we fit completely in registers. */
7483 t = build_int_cst (TREE_TYPE (gpr),
7484 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
7485 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
7486 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7487 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7488 gimplify_and_add (t, pre_p);
7492 t = build_int_cst (TREE_TYPE (fpr),
7493 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7494 + X86_64_REGPARM_MAX * 8);
7495 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
7496 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7497 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7498 gimplify_and_add (t, pre_p);
7501 /* Compute index to start of area used for integer regs. */
7504 /* int_addr = gpr + sav; */
7505 t = fold_convert (sizetype, gpr);
7506 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7507 gimplify_assign (int_addr, t, pre_p);
7511 /* sse_addr = fpr + sav; */
7512 t = fold_convert (sizetype, fpr);
7513 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7514 gimplify_assign (sse_addr, t, pre_p);
7518 int i, prev_size = 0;
7519 tree temp = create_tmp_var (type, "va_arg_tmp");
7522 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
7523 gimplify_assign (addr, t, pre_p);
7525 for (i = 0; i < XVECLEN (container, 0); i++)
7527 rtx slot = XVECEXP (container, 0, i);
7528 rtx reg = XEXP (slot, 0);
7529 enum machine_mode mode = GET_MODE (reg);
7535 tree dest_addr, dest;
7536 int cur_size = GET_MODE_SIZE (mode);
7538 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
7539 prev_size = INTVAL (XEXP (slot, 1));
7540 if (prev_size + cur_size > size)
7542 cur_size = size - prev_size;
7543 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
7544 if (mode == BLKmode)
7547 piece_type = lang_hooks.types.type_for_mode (mode, 1);
7548 if (mode == GET_MODE (reg))
7549 addr_type = build_pointer_type (piece_type);
7551 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7553 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7556 if (SSE_REGNO_P (REGNO (reg)))
7558 src_addr = sse_addr;
7559 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7563 src_addr = int_addr;
7564 src_offset = REGNO (reg) * 8;
7566 src_addr = fold_convert (addr_type, src_addr);
7567 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
7568 size_int (src_offset));
7570 dest_addr = fold_convert (daddr_type, addr);
7571 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
7572 size_int (prev_size));
7573 if (cur_size == GET_MODE_SIZE (mode))
7575 src = build_va_arg_indirect_ref (src_addr);
7576 dest = build_va_arg_indirect_ref (dest_addr);
7578 gimplify_assign (dest, src, pre_p);
7583 = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
7584 3, dest_addr, src_addr,
7585 size_int (cur_size));
7586 gimplify_and_add (copy, pre_p);
7588 prev_size += cur_size;
7594 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7595 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7596 gimplify_assign (gpr, t, pre_p);
7601 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7602 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7603 gimplify_assign (fpr, t, pre_p);
7606 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7608 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7611 /* ... otherwise out of the overflow area. */
7613 /* When we align parameter on stack for caller, if the parameter
7614 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7615 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7616 here with caller. */
7617 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
7618 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7619 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7621 /* Care for on-stack alignment if needed. */
7622 if (arg_boundary <= 64 || size == 0)
7626 HOST_WIDE_INT align = arg_boundary / 8;
7627 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
7628 size_int (align - 1));
7629 t = fold_convert (sizetype, t);
7630 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7632 t = fold_convert (TREE_TYPE (ovf), t);
7635 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7636 gimplify_assign (addr, t, pre_p);
7638 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
7639 size_int (rsize * UNITS_PER_WORD));
7640 gimplify_assign (unshare_expr (ovf), t, pre_p);
7643 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7645 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
7646 addr = fold_convert (ptrtype, addr);
7649 addr = build_va_arg_indirect_ref (addr);
7650 return build_va_arg_indirect_ref (addr);
7653 /* Return true if OPNUM's MEM should be matched
7654 in movabs* patterns. */
7657 ix86_check_movabs (rtx insn, int opnum)
7661 set = PATTERN (insn);
7662 if (GET_CODE (set) == PARALLEL)
7663 set = XVECEXP (set, 0, 0);
7664 gcc_assert (GET_CODE (set) == SET);
7665 mem = XEXP (set, opnum);
7666 while (GET_CODE (mem) == SUBREG)
7667 mem = SUBREG_REG (mem);
7668 gcc_assert (MEM_P (mem));
7669 return volatile_ok || !MEM_VOLATILE_P (mem);
7672 /* Initialize the table of extra 80387 mathematical constants. */
7675 init_ext_80387_constants (void)
7677 static const char * cst[5] =
7679 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
7680 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
7681 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
7682 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
7683 "3.1415926535897932385128089594061862044", /* 4: fldpi */
7687 for (i = 0; i < 5; i++)
7689 real_from_string (&ext_80387_constants_table[i], cst[i]);
7690 /* Ensure each constant is rounded to XFmode precision. */
7691 real_convert (&ext_80387_constants_table[i],
7692 XFmode, &ext_80387_constants_table[i]);
7695 ext_80387_constants_init = 1;
7698 /* Return non-zero if the constant is something that
7699 can be loaded with a special instruction. */
7702 standard_80387_constant_p (rtx x)
7704 enum machine_mode mode = GET_MODE (x);
7708 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
7711 if (x == CONST0_RTX (mode))
7713 if (x == CONST1_RTX (mode))
7716 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7718 /* For XFmode constants, try to find a special 80387 instruction when
7719 optimizing for size or on those CPUs that benefit from them. */
7721 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
7725 if (! ext_80387_constants_init)
7726 init_ext_80387_constants ();
7728 for (i = 0; i < 5; i++)
7729 if (real_identical (&r, &ext_80387_constants_table[i]))
7733 /* Load of the constant -0.0 or -1.0 will be split as
7734 fldz;fchs or fld1;fchs sequence. */
7735 if (real_isnegzero (&r))
7737 if (real_identical (&r, &dconstm1))
7743 /* Return the opcode of the special instruction to be used to load
7747 standard_80387_constant_opcode (rtx x)
7749 switch (standard_80387_constant_p (x))
7773 /* Return the CONST_DOUBLE representing the 80387 constant that is
7774 loaded by the specified special instruction. The argument IDX
7775 matches the return value from standard_80387_constant_p. */
7778 standard_80387_constant_rtx (int idx)
7782 if (! ext_80387_constants_init)
7783 init_ext_80387_constants ();
7799 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
7803 /* Return 1 if X is all 0s and 2 if x is all 1s
7804 in supported SSE vector mode. */
7807 standard_sse_constant_p (rtx x)
7809 enum machine_mode mode = GET_MODE (x);
7811 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
7813 if (vector_all_ones_operand (x, mode))
7829 /* Return the opcode of the special instruction to be used to load
7833 standard_sse_constant_opcode (rtx insn, rtx x)
7835 switch (standard_sse_constant_p (x))
7838 switch (get_attr_mode (insn))
7841 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7843 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7844 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7846 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7848 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7849 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7851 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7853 return "vxorps\t%x0, %x0, %x0";
7855 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7856 return "vxorps\t%x0, %x0, %x0";
7858 return "vxorpd\t%x0, %x0, %x0";
7860 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7861 return "vxorps\t%x0, %x0, %x0";
7863 return "vpxor\t%x0, %x0, %x0";
7868 return TARGET_AVX ? "vpcmpeqd\t%0, %0, %0" : "pcmpeqd\t%0, %0";
7875 /* Returns true if OP contains a symbol reference */
7878 symbolic_reference_mentioned_p (rtx op)
7883 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7886 fmt = GET_RTX_FORMAT (GET_CODE (op));
7887 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7893 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7894 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7898 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7905 /* Return true if it is appropriate to emit `ret' instructions in the
7906 body of a function. Do this only if the epilogue is simple, needing a
7907 couple of insns. Prior to reloading, we can't tell how many registers
7908 must be saved, so return false then. Return false if there is no frame
7909 marker to de-allocate. */
7912 ix86_can_use_return_insn_p (void)
7914 struct ix86_frame frame;
7916 if (! reload_completed || frame_pointer_needed)
7919 /* Don't allow more than 32k pop, since that's all we can do
7920 with one instruction. */
7921 if (crtl->args.pops_args && crtl->args.size >= 32768)
7924 ix86_compute_frame_layout (&frame);
7925 return (frame.stack_pointer_offset == UNITS_PER_WORD
7926 && (frame.nregs + frame.nsseregs) == 0);
7929 /* Value should be nonzero if functions must have frame pointers.
7930 Zero means the frame pointer need not be set up (and parms may
7931 be accessed via the stack pointer) in functions that seem suitable. */
7934 ix86_frame_pointer_required (void)
7936 /* If we accessed previous frames, then the generated code expects
7937 to be able to access the saved ebp value in our frame. */
7938 if (cfun->machine->accesses_prev_frame)
7941 /* Several x86 os'es need a frame pointer for other reasons,
7942 usually pertaining to setjmp. */
7943 if (SUBTARGET_FRAME_POINTER_REQUIRED)
7946 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
7947 turns off the frame pointer by default. Turn it back on now if
7948 we've not got a leaf function. */
7949 if (TARGET_OMIT_LEAF_FRAME_POINTER
7950 && (!current_function_is_leaf
7951 || ix86_current_function_calls_tls_descriptor))
7954 if (crtl->profile && !flag_fentry)
7960 /* Record that the current function accesses previous call frames. */
7963 ix86_setup_frame_addresses (void)
7965 cfun->machine->accesses_prev_frame = 1;
7968 #ifndef USE_HIDDEN_LINKONCE
7969 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7970 # define USE_HIDDEN_LINKONCE 1
7972 # define USE_HIDDEN_LINKONCE 0
7976 static int pic_labels_used;
7978 /* Fills in the label name that should be used for a pc thunk for
7979 the given register. */
7982 get_pc_thunk_name (char name[32], unsigned int regno)
7984 gcc_assert (!TARGET_64BIT);
7986 if (USE_HIDDEN_LINKONCE)
7987 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7989 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7993 /* This function generates code for -fpic that loads %ebx with
7994 the return address of the caller and then returns. */
7997 ix86_code_end (void)
8002 for (regno = AX_REG; regno <= SP_REG; regno++)
8007 if (!(pic_labels_used & (1 << regno)))
8010 get_pc_thunk_name (name, regno);
8012 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
8013 get_identifier (name),
8014 build_function_type (void_type_node, void_list_node));
8015 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
8016 NULL_TREE, void_type_node);
8017 TREE_PUBLIC (decl) = 1;
8018 TREE_STATIC (decl) = 1;
8023 switch_to_section (darwin_sections[text_coal_section]);
8024 fputs ("\t.weak_definition\t", asm_out_file);
8025 assemble_name (asm_out_file, name);
8026 fputs ("\n\t.private_extern\t", asm_out_file);
8027 assemble_name (asm_out_file, name);
8028 putc ('\n', asm_out_file);
8029 ASM_OUTPUT_LABEL (asm_out_file, name);
8030 DECL_WEAK (decl) = 1;
8034 if (USE_HIDDEN_LINKONCE)
8036 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
8038 targetm.asm_out.unique_section (decl, 0);
8039 switch_to_section (get_named_section (decl, NULL, 0));
8041 targetm.asm_out.globalize_label (asm_out_file, name);
8042 fputs ("\t.hidden\t", asm_out_file);
8043 assemble_name (asm_out_file, name);
8044 putc ('\n', asm_out_file);
8045 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8049 switch_to_section (text_section);
8050 ASM_OUTPUT_LABEL (asm_out_file, name);
8053 DECL_INITIAL (decl) = make_node (BLOCK);
8054 current_function_decl = decl;
8055 init_function_start (decl);
8056 first_function_block_is_cold = false;
8057 /* Make sure unwind info is emitted for the thunk if needed. */
8058 final_start_function (emit_barrier (), asm_out_file, 1);
8060 /* Pad stack IP move with 4 instructions (two NOPs count
8061 as one instruction). */
8062 if (TARGET_PAD_SHORT_FUNCTION)
8067 fputs ("\tnop\n", asm_out_file);
8070 xops[0] = gen_rtx_REG (Pmode, regno);
8071 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8072 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8073 fputs ("\tret\n", asm_out_file);
8074 final_end_function ();
8075 init_insn_lengths ();
8076 free_after_compilation (cfun);
8078 current_function_decl = NULL;
8081 if (flag_split_stack)
8082 file_end_indicate_split_stack ();
8085 /* Emit code for the SET_GOT patterns. */
8088 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8094 if (TARGET_VXWORKS_RTP && flag_pic)
8096 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8097 xops[2] = gen_rtx_MEM (Pmode,
8098 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8099 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8101 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8102 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8103 an unadorned address. */
8104 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8105 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8106 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8110 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8112 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
8114 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8117 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8120 output_asm_insn ("call\t%a2", xops);
8121 #ifdef DWARF2_UNWIND_INFO
8122 /* The call to next label acts as a push. */
8123 if (dwarf2out_do_frame ())
8127 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8128 gen_rtx_PLUS (Pmode,
8131 RTX_FRAME_RELATED_P (insn) = 1;
8132 dwarf2out_frame_debug (insn, true);
8139 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8140 is what will be referenced by the Mach-O PIC subsystem. */
8142 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8145 targetm.asm_out.internal_label (asm_out_file, "L",
8146 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8150 output_asm_insn ("pop%z0\t%0", xops);
8151 #ifdef DWARF2_UNWIND_INFO
8152 /* The pop is a pop and clobbers dest, but doesn't restore it
8153 for unwind info purposes. */
8154 if (dwarf2out_do_frame ())
8158 insn = emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
8159 dwarf2out_frame_debug (insn, true);
8160 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8161 gen_rtx_PLUS (Pmode,
8164 RTX_FRAME_RELATED_P (insn) = 1;
8165 dwarf2out_frame_debug (insn, true);
8174 get_pc_thunk_name (name, REGNO (dest));
8175 pic_labels_used |= 1 << REGNO (dest);
8177 #ifdef DWARF2_UNWIND_INFO
8178 /* Ensure all queued register saves are flushed before the
8180 if (dwarf2out_do_frame ())
8181 dwarf2out_flush_queued_reg_saves ();
8183 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8184 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8185 output_asm_insn ("call\t%X2", xops);
8186 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8187 is what will be referenced by the Mach-O PIC subsystem. */
8190 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8192 targetm.asm_out.internal_label (asm_out_file, "L",
8193 CODE_LABEL_NUMBER (label));
8200 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
8201 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8203 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
8208 /* Generate an "push" pattern for input ARG. */
8213 struct machine_function *m = cfun->machine;
8215 if (m->fs.cfa_reg == stack_pointer_rtx)
8216 m->fs.cfa_offset += UNITS_PER_WORD;
8217 m->fs.sp_offset += UNITS_PER_WORD;
8219 return gen_rtx_SET (VOIDmode,
8221 gen_rtx_PRE_DEC (Pmode,
8222 stack_pointer_rtx)),
8226 /* Generate an "pop" pattern for input ARG. */
8231 return gen_rtx_SET (VOIDmode,
8234 gen_rtx_POST_INC (Pmode,
8235 stack_pointer_rtx)));
8238 /* Return >= 0 if there is an unused call-clobbered register available
8239 for the entire function. */
8242 ix86_select_alt_pic_regnum (void)
8244 if (current_function_is_leaf
8246 && !ix86_current_function_calls_tls_descriptor)
8249 /* Can't use the same register for both PIC and DRAP. */
8251 drap = REGNO (crtl->drap_reg);
8254 for (i = 2; i >= 0; --i)
8255 if (i != drap && !df_regs_ever_live_p (i))
8259 return INVALID_REGNUM;
8262 /* Return 1 if we need to save REGNO. */
8264 ix86_save_reg (unsigned int regno, int maybe_eh_return)
8266 if (pic_offset_table_rtx
8267 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
8268 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8270 || crtl->calls_eh_return
8271 || crtl->uses_const_pool))
8273 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
8278 if (crtl->calls_eh_return && maybe_eh_return)
8283 unsigned test = EH_RETURN_DATA_REGNO (i);
8284 if (test == INVALID_REGNUM)
8291 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8294 return (df_regs_ever_live_p (regno)
8295 && !call_used_regs[regno]
8296 && !fixed_regs[regno]
8297 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
8300 /* Return number of saved general prupose registers. */
8303 ix86_nsaved_regs (void)
8308 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8309 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8314 /* Return number of saved SSE registrers. */
8317 ix86_nsaved_sseregs (void)
8322 if (ix86_cfun_abi () != MS_ABI)
8324 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8325 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8330 /* Given FROM and TO register numbers, say whether this elimination is
8331 allowed. If stack alignment is needed, we can only replace argument
8332 pointer with hard frame pointer, or replace frame pointer with stack
8333 pointer. Otherwise, frame pointer elimination is automatically
8334 handled and all other eliminations are valid. */
8337 ix86_can_eliminate (const int from, const int to)
8339 if (stack_realign_fp)
8340 return ((from == ARG_POINTER_REGNUM
8341 && to == HARD_FRAME_POINTER_REGNUM)
8342 || (from == FRAME_POINTER_REGNUM
8343 && to == STACK_POINTER_REGNUM));
8345 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
8348 /* Return the offset between two registers, one to be eliminated, and the other
8349 its replacement, at the start of a routine. */
8352 ix86_initial_elimination_offset (int from, int to)
8354 struct ix86_frame frame;
8355 ix86_compute_frame_layout (&frame);
8357 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8358 return frame.hard_frame_pointer_offset;
8359 else if (from == FRAME_POINTER_REGNUM
8360 && to == HARD_FRAME_POINTER_REGNUM)
8361 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
8364 gcc_assert (to == STACK_POINTER_REGNUM);
8366 if (from == ARG_POINTER_REGNUM)
8367 return frame.stack_pointer_offset;
8369 gcc_assert (from == FRAME_POINTER_REGNUM);
8370 return frame.stack_pointer_offset - frame.frame_pointer_offset;
8374 /* In a dynamically-aligned function, we can't know the offset from
8375 stack pointer to frame pointer, so we must ensure that setjmp
8376 eliminates fp against the hard fp (%ebp) rather than trying to
8377 index from %esp up to the top of the frame across a gap that is
8378 of unknown (at compile-time) size. */
8380 ix86_builtin_setjmp_frame_value (void)
8382 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
8385 /* On the x86 -fsplit-stack and -fstack-protector both use the same
8386 field in the TCB, so they can not be used together. */
8389 ix86_supports_split_stack (bool report ATTRIBUTE_UNUSED)
8393 #ifndef TARGET_THREAD_SPLIT_STACK_OFFSET
8395 error ("%<-fsplit-stack%> currently only supported on GNU/Linux");
8398 if (!HAVE_GAS_CFI_PERSONALITY_DIRECTIVE)
8401 error ("%<-fsplit-stack%> requires "
8402 "assembler support for CFI directives");
8410 /* When using -fsplit-stack, the allocation routines set a field in
8411 the TCB to the bottom of the stack plus this much space, measured
8414 #define SPLIT_STACK_AVAILABLE 256
8416 /* Fill structure ix86_frame about frame of currently computed function. */
8419 ix86_compute_frame_layout (struct ix86_frame *frame)
8421 unsigned int stack_alignment_needed;
8422 HOST_WIDE_INT offset;
8423 unsigned int preferred_alignment;
8424 HOST_WIDE_INT size = get_frame_size ();
8425 HOST_WIDE_INT to_allocate;
8427 frame->nregs = ix86_nsaved_regs ();
8428 frame->nsseregs = ix86_nsaved_sseregs ();
8430 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
8431 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
8433 /* MS ABI seem to require stack alignment to be always 16 except for function
8434 prologues and leaf. */
8435 if ((ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
8436 && (!current_function_is_leaf || cfun->calls_alloca != 0
8437 || ix86_current_function_calls_tls_descriptor))
8439 preferred_alignment = 16;
8440 stack_alignment_needed = 16;
8441 crtl->preferred_stack_boundary = 128;
8442 crtl->stack_alignment_needed = 128;
8445 gcc_assert (!size || stack_alignment_needed);
8446 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
8447 gcc_assert (preferred_alignment <= stack_alignment_needed);
8449 /* During reload iteration the amount of registers saved can change.
8450 Recompute the value as needed. Do not recompute when amount of registers
8451 didn't change as reload does multiple calls to the function and does not
8452 expect the decision to change within single iteration. */
8453 if (!optimize_function_for_size_p (cfun)
8454 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
8456 int count = frame->nregs;
8457 struct cgraph_node *node = cgraph_node (current_function_decl);
8459 cfun->machine->use_fast_prologue_epilogue_nregs = count;
8460 /* The fast prologue uses move instead of push to save registers. This
8461 is significantly longer, but also executes faster as modern hardware
8462 can execute the moves in parallel, but can't do that for push/pop.
8464 Be careful about choosing what prologue to emit: When function takes
8465 many instructions to execute we may use slow version as well as in
8466 case function is known to be outside hot spot (this is known with
8467 feedback only). Weight the size of function by number of registers
8468 to save as it is cheap to use one or two push instructions but very
8469 slow to use many of them. */
8471 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
8472 if (node->frequency < NODE_FREQUENCY_NORMAL
8473 || (flag_branch_probabilities
8474 && node->frequency < NODE_FREQUENCY_HOT))
8475 cfun->machine->use_fast_prologue_epilogue = false;
8477 cfun->machine->use_fast_prologue_epilogue
8478 = !expensive_function_p (count);
8480 if (TARGET_PROLOGUE_USING_MOVE
8481 && cfun->machine->use_fast_prologue_epilogue)
8482 frame->save_regs_using_mov = true;
8484 frame->save_regs_using_mov = false;
8486 /* If static stack checking is enabled and done with probes, the registers
8487 need to be saved before allocating the frame. */
8488 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
8489 frame->save_regs_using_mov = false;
8491 /* Skip return address. */
8492 offset = UNITS_PER_WORD;
8494 /* Skip pushed static chain. */
8495 if (ix86_static_chain_on_stack)
8496 offset += UNITS_PER_WORD;
8498 /* Skip saved base pointer. */
8499 if (frame_pointer_needed)
8500 offset += UNITS_PER_WORD;
8502 frame->hard_frame_pointer_offset = offset;
8504 /* Register save area */
8505 offset += frame->nregs * UNITS_PER_WORD;
8506 frame->reg_save_offset = offset;
8508 /* Align and set SSE register save area. */
8509 if (frame->nsseregs)
8511 /* The only ABI that has saved SSE registers (Win64) also has a
8512 16-byte aligned default stack, and thus we don't need to be
8513 within the re-aligned local stack frame to save them. */
8514 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
8515 offset = (offset + 16 - 1) & -16;
8516 offset += frame->nsseregs * 16;
8518 frame->sse_reg_save_offset = offset;
8520 /* The re-aligned stack starts here. Values before this point are not
8521 directly comparable with values below this point. In order to make
8522 sure that no value happens to be the same before and after, force
8523 the alignment computation below to add a non-zero value. */
8524 if (stack_realign_fp)
8525 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
8528 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
8529 offset += frame->va_arg_size;
8531 /* Align start of frame for local function. */
8532 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
8534 /* Frame pointer points here. */
8535 frame->frame_pointer_offset = offset;
8539 /* Add outgoing arguments area. Can be skipped if we eliminated
8540 all the function calls as dead code.
8541 Skipping is however impossible when function calls alloca. Alloca
8542 expander assumes that last crtl->outgoing_args_size
8543 of stack frame are unused. */
8544 if (ACCUMULATE_OUTGOING_ARGS
8545 && (!current_function_is_leaf || cfun->calls_alloca
8546 || ix86_current_function_calls_tls_descriptor))
8548 offset += crtl->outgoing_args_size;
8549 frame->outgoing_arguments_size = crtl->outgoing_args_size;
8552 frame->outgoing_arguments_size = 0;
8554 /* Align stack boundary. Only needed if we're calling another function
8556 if (!current_function_is_leaf || cfun->calls_alloca
8557 || ix86_current_function_calls_tls_descriptor)
8558 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
8560 /* We've reached end of stack frame. */
8561 frame->stack_pointer_offset = offset;
8563 /* Size prologue needs to allocate. */
8564 to_allocate = offset - frame->sse_reg_save_offset;
8566 if ((!to_allocate && frame->nregs <= 1)
8567 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
8568 frame->save_regs_using_mov = false;
8570 if (ix86_using_red_zone ()
8571 && current_function_sp_is_unchanging
8572 && current_function_is_leaf
8573 && !ix86_current_function_calls_tls_descriptor)
8575 frame->red_zone_size = to_allocate;
8576 if (frame->save_regs_using_mov)
8577 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8578 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
8579 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
8582 frame->red_zone_size = 0;
8583 frame->stack_pointer_offset -= frame->red_zone_size;
8586 /* This is semi-inlined memory_address_length, but simplified
8587 since we know that we're always dealing with reg+offset, and
8588 to avoid having to create and discard all that rtl. */
8591 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
8597 /* EBP and R13 cannot be encoded without an offset. */
8598 len = (regno == BP_REG || regno == R13_REG);
8600 else if (IN_RANGE (offset, -128, 127))
8603 /* ESP and R12 must be encoded with a SIB byte. */
8604 if (regno == SP_REG || regno == R12_REG)
8610 /* Return an RTX that points to CFA_OFFSET within the stack frame.
8611 The valid base registers are taken from CFUN->MACHINE->FS. */
8614 choose_baseaddr (HOST_WIDE_INT cfa_offset)
8616 const struct machine_function *m = cfun->machine;
8617 rtx base_reg = NULL;
8618 HOST_WIDE_INT base_offset = 0;
8620 if (m->use_fast_prologue_epilogue)
8622 /* Choose the base register most likely to allow the most scheduling
8623 opportunities. Generally FP is valid througout the function,
8624 while DRAP must be reloaded within the epilogue. But choose either
8625 over the SP due to increased encoding size. */
8629 base_reg = hard_frame_pointer_rtx;
8630 base_offset = m->fs.fp_offset - cfa_offset;
8632 else if (m->fs.drap_valid)
8634 base_reg = crtl->drap_reg;
8635 base_offset = 0 - cfa_offset;
8637 else if (m->fs.sp_valid)
8639 base_reg = stack_pointer_rtx;
8640 base_offset = m->fs.sp_offset - cfa_offset;
8645 HOST_WIDE_INT toffset;
8648 /* Choose the base register with the smallest address encoding.
8649 With a tie, choose FP > DRAP > SP. */
8652 base_reg = stack_pointer_rtx;
8653 base_offset = m->fs.sp_offset - cfa_offset;
8654 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
8656 if (m->fs.drap_valid)
8658 toffset = 0 - cfa_offset;
8659 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
8662 base_reg = crtl->drap_reg;
8663 base_offset = toffset;
8669 toffset = m->fs.fp_offset - cfa_offset;
8670 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
8673 base_reg = hard_frame_pointer_rtx;
8674 base_offset = toffset;
8679 gcc_assert (base_reg != NULL);
8681 return plus_constant (base_reg, base_offset);
8684 /* Emit code to save registers in the prologue. */
8687 ix86_emit_save_regs (void)
8692 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
8693 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8695 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
8696 RTX_FRAME_RELATED_P (insn) = 1;
8700 /* Emit a single register save at CFA - CFA_OFFSET. */
8703 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
8704 HOST_WIDE_INT cfa_offset)
8706 struct machine_function *m = cfun->machine;
8707 rtx reg = gen_rtx_REG (mode, regno);
8708 rtx mem, addr, base, insn;
8710 addr = choose_baseaddr (cfa_offset);
8711 mem = gen_frame_mem (mode, addr);
8713 /* For SSE saves, we need to indicate the 128-bit alignment. */
8714 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
8716 insn = emit_move_insn (mem, reg);
8717 RTX_FRAME_RELATED_P (insn) = 1;
8720 if (GET_CODE (base) == PLUS)
8721 base = XEXP (base, 0);
8722 gcc_checking_assert (REG_P (base));
8724 /* When saving registers into a re-aligned local stack frame, avoid
8725 any tricky guessing by dwarf2out. */
8726 if (m->fs.realigned)
8728 gcc_checking_assert (stack_realign_drap);
8730 if (regno == REGNO (crtl->drap_reg))
8732 /* A bit of a hack. We force the DRAP register to be saved in
8733 the re-aligned stack frame, which provides us with a copy
8734 of the CFA that will last past the prologue. Install it. */
8735 gcc_checking_assert (cfun->machine->fs.fp_valid);
8736 addr = plus_constant (hard_frame_pointer_rtx,
8737 cfun->machine->fs.fp_offset - cfa_offset);
8738 mem = gen_rtx_MEM (mode, addr);
8739 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
8743 /* The frame pointer is a stable reference within the
8744 aligned frame. Use it. */
8745 gcc_checking_assert (cfun->machine->fs.fp_valid);
8746 addr = plus_constant (hard_frame_pointer_rtx,
8747 cfun->machine->fs.fp_offset - cfa_offset);
8748 mem = gen_rtx_MEM (mode, addr);
8749 add_reg_note (insn, REG_CFA_EXPRESSION,
8750 gen_rtx_SET (VOIDmode, mem, reg));
8754 /* The memory may not be relative to the current CFA register,
8755 which means that we may need to generate a new pattern for
8756 use by the unwind info. */
8757 else if (base != m->fs.cfa_reg)
8759 addr = plus_constant (m->fs.cfa_reg, m->fs.cfa_offset - cfa_offset);
8760 mem = gen_rtx_MEM (mode, addr);
8761 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
8765 /* Emit code to save registers using MOV insns.
8766 First register is stored at CFA - CFA_OFFSET. */
8768 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
8772 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8773 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8775 ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
8776 cfa_offset -= UNITS_PER_WORD;
8780 /* Emit code to save SSE registers using MOV insns.
8781 First register is stored at CFA - CFA_OFFSET. */
8783 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
8787 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8788 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8790 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
8795 static GTY(()) rtx queued_cfa_restores;
8797 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
8798 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
8799 Don't add the note if the previously saved value will be left untouched
8800 within stack red-zone till return, as unwinders can find the same value
8801 in the register and on the stack. */
8804 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
8806 if (cfa_offset <= cfun->machine->fs.red_zone_offset)
8811 add_reg_note (insn, REG_CFA_RESTORE, reg);
8812 RTX_FRAME_RELATED_P (insn) = 1;
8816 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
8819 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
8822 ix86_add_queued_cfa_restore_notes (rtx insn)
8825 if (!queued_cfa_restores)
8827 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
8829 XEXP (last, 1) = REG_NOTES (insn);
8830 REG_NOTES (insn) = queued_cfa_restores;
8831 queued_cfa_restores = NULL_RTX;
8832 RTX_FRAME_RELATED_P (insn) = 1;
8835 /* Expand prologue or epilogue stack adjustment.
8836 The pattern exist to put a dependency on all ebp-based memory accesses.
8837 STYLE should be negative if instructions should be marked as frame related,
8838 zero if %r11 register is live and cannot be freely used and positive
8842 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
8843 int style, bool set_cfa)
8845 struct machine_function *m = cfun->machine;
8849 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
8850 else if (x86_64_immediate_operand (offset, DImode))
8851 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
8855 /* r11 is used by indirect sibcall return as well, set before the
8856 epilogue and used after the epilogue. */
8858 tmp = gen_rtx_REG (DImode, R11_REG);
8861 gcc_assert (src != hard_frame_pointer_rtx
8862 && dest != hard_frame_pointer_rtx);
8863 tmp = hard_frame_pointer_rtx;
8865 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
8867 RTX_FRAME_RELATED_P (insn) = 1;
8869 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
8872 insn = emit_insn (insn);
8874 ix86_add_queued_cfa_restore_notes (insn);
8880 gcc_assert (m->fs.cfa_reg == src);
8881 m->fs.cfa_offset += INTVAL (offset);
8882 m->fs.cfa_reg = dest;
8884 r = gen_rtx_PLUS (Pmode, src, offset);
8885 r = gen_rtx_SET (VOIDmode, dest, r);
8886 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
8887 RTX_FRAME_RELATED_P (insn) = 1;
8890 RTX_FRAME_RELATED_P (insn) = 1;
8892 if (dest == stack_pointer_rtx)
8894 HOST_WIDE_INT ooffset = m->fs.sp_offset;
8895 bool valid = m->fs.sp_valid;
8897 if (src == hard_frame_pointer_rtx)
8899 valid = m->fs.fp_valid;
8900 ooffset = m->fs.fp_offset;
8902 else if (src == crtl->drap_reg)
8904 valid = m->fs.drap_valid;
8909 /* Else there are two possibilities: SP itself, which we set
8910 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
8911 taken care of this by hand along the eh_return path. */
8912 gcc_checking_assert (src == stack_pointer_rtx
8913 || offset == const0_rtx);
8916 m->fs.sp_offset = ooffset - INTVAL (offset);
8917 m->fs.sp_valid = valid;
8921 /* Find an available register to be used as dynamic realign argument
8922 pointer regsiter. Such a register will be written in prologue and
8923 used in begin of body, so it must not be
8924 1. parameter passing register.
8926 We reuse static-chain register if it is available. Otherwise, we
8927 use DI for i386 and R13 for x86-64. We chose R13 since it has
8930 Return: the regno of chosen register. */
8933 find_drap_reg (void)
8935 tree decl = cfun->decl;
8939 /* Use R13 for nested function or function need static chain.
8940 Since function with tail call may use any caller-saved
8941 registers in epilogue, DRAP must not use caller-saved
8942 register in such case. */
8943 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
8950 /* Use DI for nested function or function need static chain.
8951 Since function with tail call may use any caller-saved
8952 registers in epilogue, DRAP must not use caller-saved
8953 register in such case. */
8954 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
8957 /* Reuse static chain register if it isn't used for parameter
8959 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
8960 && !lookup_attribute ("fastcall",
8961 TYPE_ATTRIBUTES (TREE_TYPE (decl)))
8962 && !lookup_attribute ("thiscall",
8963 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
8970 /* Return minimum incoming stack alignment. */
8973 ix86_minimum_incoming_stack_boundary (bool sibcall)
8975 unsigned int incoming_stack_boundary;
8977 /* Prefer the one specified at command line. */
8978 if (ix86_user_incoming_stack_boundary)
8979 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
8980 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
8981 if -mstackrealign is used, it isn't used for sibcall check and
8982 estimated stack alignment is 128bit. */
8985 && ix86_force_align_arg_pointer
8986 && crtl->stack_alignment_estimated == 128)
8987 incoming_stack_boundary = MIN_STACK_BOUNDARY;
8989 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
8991 /* Incoming stack alignment can be changed on individual functions
8992 via force_align_arg_pointer attribute. We use the smallest
8993 incoming stack boundary. */
8994 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
8995 && lookup_attribute (ix86_force_align_arg_pointer_string,
8996 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
8997 incoming_stack_boundary = MIN_STACK_BOUNDARY;
8999 /* The incoming stack frame has to be aligned at least at
9000 parm_stack_boundary. */
9001 if (incoming_stack_boundary < crtl->parm_stack_boundary)
9002 incoming_stack_boundary = crtl->parm_stack_boundary;
9004 /* Stack at entrance of main is aligned by runtime. We use the
9005 smallest incoming stack boundary. */
9006 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
9007 && DECL_NAME (current_function_decl)
9008 && MAIN_NAME_P (DECL_NAME (current_function_decl))
9009 && DECL_FILE_SCOPE_P (current_function_decl))
9010 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
9012 return incoming_stack_boundary;
9015 /* Update incoming stack boundary and estimated stack alignment. */
9018 ix86_update_stack_boundary (void)
9020 ix86_incoming_stack_boundary
9021 = ix86_minimum_incoming_stack_boundary (false);
9023 /* x86_64 vararg needs 16byte stack alignment for register save
9027 && crtl->stack_alignment_estimated < 128)
9028 crtl->stack_alignment_estimated = 128;
9031 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
9032 needed or an rtx for DRAP otherwise. */
9035 ix86_get_drap_rtx (void)
9037 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
9038 crtl->need_drap = true;
9040 if (stack_realign_drap)
9042 /* Assign DRAP to vDRAP and returns vDRAP */
9043 unsigned int regno = find_drap_reg ();
9048 arg_ptr = gen_rtx_REG (Pmode, regno);
9049 crtl->drap_reg = arg_ptr;
9052 drap_vreg = copy_to_reg (arg_ptr);
9056 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
9059 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
9060 RTX_FRAME_RELATED_P (insn) = 1;
9068 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
9071 ix86_internal_arg_pointer (void)
9073 return virtual_incoming_args_rtx;
9076 struct scratch_reg {
9081 /* Return a short-lived scratch register for use on function entry.
9082 In 32-bit mode, it is valid only after the registers are saved
9083 in the prologue. This register must be released by means of
9084 release_scratch_register_on_entry once it is dead. */
9087 get_scratch_register_on_entry (struct scratch_reg *sr)
9095 /* We always use R11 in 64-bit mode. */
9100 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9102 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9103 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9104 int regparm = ix86_function_regparm (fntype, decl);
9106 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9108 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9109 for the static chain register. */
9110 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9111 && drap_regno != AX_REG)
9113 else if (regparm < 2 && drap_regno != DX_REG)
9115 /* ecx is the static chain register. */
9116 else if (regparm < 3 && !fastcall_p && !static_chain_p
9117 && drap_regno != CX_REG)
9119 else if (ix86_save_reg (BX_REG, true))
9121 /* esi is the static chain register. */
9122 else if (!(regparm == 3 && static_chain_p)
9123 && ix86_save_reg (SI_REG, true))
9125 else if (ix86_save_reg (DI_REG, true))
9129 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9134 sr->reg = gen_rtx_REG (Pmode, regno);
9137 rtx insn = emit_insn (gen_push (sr->reg));
9138 RTX_FRAME_RELATED_P (insn) = 1;
9142 /* Release a scratch register obtained from the preceding function. */
9145 release_scratch_register_on_entry (struct scratch_reg *sr)
9149 rtx x, insn = emit_insn (gen_pop (sr->reg));
9151 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9152 RTX_FRAME_RELATED_P (insn) = 1;
9153 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
9154 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9155 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
9159 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9161 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9164 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
9166 /* We skip the probe for the first interval + a small dope of 4 words and
9167 probe that many bytes past the specified size to maintain a protection
9168 area at the botton of the stack. */
9169 const int dope = 4 * UNITS_PER_WORD;
9170 rtx size_rtx = GEN_INT (size);
9172 /* See if we have a constant small number of probes to generate. If so,
9173 that's the easy case. The run-time loop is made up of 11 insns in the
9174 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9175 for n # of intervals. */
9176 if (size <= 5 * PROBE_INTERVAL)
9178 HOST_WIDE_INT i, adjust;
9179 bool first_probe = true;
9181 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9182 values of N from 1 until it exceeds SIZE. If only one probe is
9183 needed, this will not generate any code. Then adjust and probe
9184 to PROBE_INTERVAL + SIZE. */
9185 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9189 adjust = 2 * PROBE_INTERVAL + dope;
9190 first_probe = false;
9193 adjust = PROBE_INTERVAL;
9195 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9196 plus_constant (stack_pointer_rtx, -adjust)));
9197 emit_stack_probe (stack_pointer_rtx);
9201 adjust = size + PROBE_INTERVAL + dope;
9203 adjust = size + PROBE_INTERVAL - i;
9205 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9206 plus_constant (stack_pointer_rtx, -adjust)));
9207 emit_stack_probe (stack_pointer_rtx);
9209 /* Adjust back to account for the additional first interval. */
9210 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9211 plus_constant (stack_pointer_rtx,
9212 PROBE_INTERVAL + dope)));
9215 /* Otherwise, do the same as above, but in a loop. Note that we must be
9216 extra careful with variables wrapping around because we might be at
9217 the very top (or the very bottom) of the address space and we have
9218 to be able to handle this case properly; in particular, we use an
9219 equality test for the loop condition. */
9222 HOST_WIDE_INT rounded_size;
9223 struct scratch_reg sr;
9225 get_scratch_register_on_entry (&sr);
9228 /* Step 1: round SIZE to the previous multiple of the interval. */
9230 rounded_size = size & -PROBE_INTERVAL;
9233 /* Step 2: compute initial and final value of the loop counter. */
9235 /* SP = SP_0 + PROBE_INTERVAL. */
9236 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9237 plus_constant (stack_pointer_rtx,
9238 - (PROBE_INTERVAL + dope))));
9240 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9241 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
9242 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
9243 gen_rtx_PLUS (Pmode, sr.reg,
9244 stack_pointer_rtx)));
9249 while (SP != LAST_ADDR)
9251 SP = SP + PROBE_INTERVAL
9255 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9256 values of N from 1 until it is equal to ROUNDED_SIZE. */
9258 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
9261 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9262 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9264 if (size != rounded_size)
9266 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9267 plus_constant (stack_pointer_rtx,
9268 rounded_size - size)));
9269 emit_stack_probe (stack_pointer_rtx);
9272 /* Adjust back to account for the additional first interval. */
9273 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9274 plus_constant (stack_pointer_rtx,
9275 PROBE_INTERVAL + dope)));
9277 release_scratch_register_on_entry (&sr);
9280 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
9281 cfun->machine->fs.sp_offset += size;
9283 /* Make sure nothing is scheduled before we are done. */
9284 emit_insn (gen_blockage ());
9287 /* Adjust the stack pointer up to REG while probing it. */
9290 output_adjust_stack_and_probe (rtx reg)
9292 static int labelno = 0;
9293 char loop_lab[32], end_lab[32];
9296 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9297 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9299 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9301 /* Jump to END_LAB if SP == LAST_ADDR. */
9302 xops[0] = stack_pointer_rtx;
9304 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9305 fputs ("\tje\t", asm_out_file);
9306 assemble_name_raw (asm_out_file, end_lab);
9307 fputc ('\n', asm_out_file);
9309 /* SP = SP + PROBE_INTERVAL. */
9310 xops[1] = GEN_INT (PROBE_INTERVAL);
9311 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9314 xops[1] = const0_rtx;
9315 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
9317 fprintf (asm_out_file, "\tjmp\t");
9318 assemble_name_raw (asm_out_file, loop_lab);
9319 fputc ('\n', asm_out_file);
9321 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9326 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9327 inclusive. These are offsets from the current stack pointer. */
9330 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
9332 /* See if we have a constant small number of probes to generate. If so,
9333 that's the easy case. The run-time loop is made up of 7 insns in the
9334 generic case while the compile-time loop is made up of n insns for n #
9336 if (size <= 7 * PROBE_INTERVAL)
9340 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9341 it exceeds SIZE. If only one probe is needed, this will not
9342 generate any code. Then probe at FIRST + SIZE. */
9343 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9344 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
9346 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
9349 /* Otherwise, do the same as above, but in a loop. Note that we must be
9350 extra careful with variables wrapping around because we might be at
9351 the very top (or the very bottom) of the address space and we have
9352 to be able to handle this case properly; in particular, we use an
9353 equality test for the loop condition. */
9356 HOST_WIDE_INT rounded_size, last;
9357 struct scratch_reg sr;
9359 get_scratch_register_on_entry (&sr);
9362 /* Step 1: round SIZE to the previous multiple of the interval. */
9364 rounded_size = size & -PROBE_INTERVAL;
9367 /* Step 2: compute initial and final value of the loop counter. */
9369 /* TEST_OFFSET = FIRST. */
9370 emit_move_insn (sr.reg, GEN_INT (-first));
9372 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9373 last = first + rounded_size;
9378 while (TEST_ADDR != LAST_ADDR)
9380 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9384 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9385 until it is equal to ROUNDED_SIZE. */
9387 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
9390 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9391 that SIZE is equal to ROUNDED_SIZE. */
9393 if (size != rounded_size)
9394 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
9397 rounded_size - size));
9399 release_scratch_register_on_entry (&sr);
9402 /* Make sure nothing is scheduled before we are done. */
9403 emit_insn (gen_blockage ());
9406 /* Probe a range of stack addresses from REG to END, inclusive. These are
9407 offsets from the current stack pointer. */
9410 output_probe_stack_range (rtx reg, rtx end)
9412 static int labelno = 0;
9413 char loop_lab[32], end_lab[32];
9416 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9417 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9419 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9421 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
9424 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9425 fputs ("\tje\t", asm_out_file);
9426 assemble_name_raw (asm_out_file, end_lab);
9427 fputc ('\n', asm_out_file);
9429 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
9430 xops[1] = GEN_INT (PROBE_INTERVAL);
9431 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9433 /* Probe at TEST_ADDR. */
9434 xops[0] = stack_pointer_rtx;
9436 xops[2] = const0_rtx;
9437 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
9439 fprintf (asm_out_file, "\tjmp\t");
9440 assemble_name_raw (asm_out_file, loop_lab);
9441 fputc ('\n', asm_out_file);
9443 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9448 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
9449 to be generated in correct form. */
9451 ix86_finalize_stack_realign_flags (void)
9453 /* Check if stack realign is really needed after reload, and
9454 stores result in cfun */
9455 unsigned int incoming_stack_boundary
9456 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
9457 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
9458 unsigned int stack_realign = (incoming_stack_boundary
9459 < (current_function_is_leaf
9460 ? crtl->max_used_stack_slot_alignment
9461 : crtl->stack_alignment_needed));
9463 if (crtl->stack_realign_finalized)
9465 /* After stack_realign_needed is finalized, we can't no longer
9467 gcc_assert (crtl->stack_realign_needed == stack_realign);
9471 crtl->stack_realign_needed = stack_realign;
9472 crtl->stack_realign_finalized = true;
9476 /* Expand the prologue into a bunch of separate insns. */
9479 ix86_expand_prologue (void)
9481 struct machine_function *m = cfun->machine;
9484 struct ix86_frame frame;
9485 HOST_WIDE_INT allocate;
9486 bool int_registers_saved;
9488 ix86_finalize_stack_realign_flags ();
9490 /* DRAP should not coexist with stack_realign_fp */
9491 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
9493 memset (&m->fs, 0, sizeof (m->fs));
9495 /* Initialize CFA state for before the prologue. */
9496 m->fs.cfa_reg = stack_pointer_rtx;
9497 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
9499 /* Track SP offset to the CFA. We continue tracking this after we've
9500 swapped the CFA register away from SP. In the case of re-alignment
9501 this is fudged; we're interested to offsets within the local frame. */
9502 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9503 m->fs.sp_valid = true;
9505 ix86_compute_frame_layout (&frame);
9507 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
9509 /* We should have already generated an error for any use of
9510 ms_hook on a nested function. */
9511 gcc_checking_assert (!ix86_static_chain_on_stack);
9513 /* Check if profiling is active and we shall use profiling before
9514 prologue variant. If so sorry. */
9515 if (crtl->profile && flag_fentry != 0)
9516 sorry ("ms_hook_prologue attribute isn't compatible with -mfentry for 32-bit");
9518 /* In ix86_asm_output_function_label we emitted:
9519 8b ff movl.s %edi,%edi
9521 8b ec movl.s %esp,%ebp
9523 This matches the hookable function prologue in Win32 API
9524 functions in Microsoft Windows XP Service Pack 2 and newer.
9525 Wine uses this to enable Windows apps to hook the Win32 API
9526 functions provided by Wine.
9528 What that means is that we've already set up the frame pointer. */
9530 if (frame_pointer_needed
9531 && !(crtl->drap_reg && crtl->stack_realign_needed))
9535 /* We've decided to use the frame pointer already set up.
9536 Describe this to the unwinder by pretending that both
9537 push and mov insns happen right here.
9539 Putting the unwind info here at the end of the ms_hook
9540 is done so that we can make absolutely certain we get
9541 the required byte sequence at the start of the function,
9542 rather than relying on an assembler that can produce
9543 the exact encoding required.
9545 However it does mean (in the unpatched case) that we have
9546 a 1 insn window where the asynchronous unwind info is
9547 incorrect. However, if we placed the unwind info at
9548 its correct location we would have incorrect unwind info
9549 in the patched case. Which is probably all moot since
9550 I don't expect Wine generates dwarf2 unwind info for the
9551 system libraries that use this feature. */
9553 insn = emit_insn (gen_blockage ());
9555 push = gen_push (hard_frame_pointer_rtx);
9556 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
9558 RTX_FRAME_RELATED_P (push) = 1;
9559 RTX_FRAME_RELATED_P (mov) = 1;
9561 RTX_FRAME_RELATED_P (insn) = 1;
9562 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9563 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
9565 /* Note that gen_push incremented m->fs.cfa_offset, even
9566 though we didn't emit the push insn here. */
9567 m->fs.cfa_reg = hard_frame_pointer_rtx;
9568 m->fs.fp_offset = m->fs.cfa_offset;
9569 m->fs.fp_valid = true;
9573 /* The frame pointer is not needed so pop %ebp again.
9574 This leaves us with a pristine state. */
9575 emit_insn (gen_pop (hard_frame_pointer_rtx));
9579 /* The first insn of a function that accepts its static chain on the
9580 stack is to push the register that would be filled in by a direct
9581 call. This insn will be skipped by the trampoline. */
9582 else if (ix86_static_chain_on_stack)
9584 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
9585 emit_insn (gen_blockage ());
9587 /* We don't want to interpret this push insn as a register save,
9588 only as a stack adjustment. The real copy of the register as
9589 a save will be done later, if needed. */
9590 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
9591 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
9592 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
9593 RTX_FRAME_RELATED_P (insn) = 1;
9596 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
9597 of DRAP is needed and stack realignment is really needed after reload */
9598 if (stack_realign_drap)
9600 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9602 /* Only need to push parameter pointer reg if it is caller saved. */
9603 if (!call_used_regs[REGNO (crtl->drap_reg)])
9605 /* Push arg pointer reg */
9606 insn = emit_insn (gen_push (crtl->drap_reg));
9607 RTX_FRAME_RELATED_P (insn) = 1;
9610 /* Grab the argument pointer. */
9611 t = plus_constant (stack_pointer_rtx, m->fs.sp_offset);
9612 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
9613 RTX_FRAME_RELATED_P (insn) = 1;
9614 m->fs.cfa_reg = crtl->drap_reg;
9615 m->fs.cfa_offset = 0;
9617 /* Align the stack. */
9618 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
9620 GEN_INT (-align_bytes)));
9621 RTX_FRAME_RELATED_P (insn) = 1;
9623 /* Replicate the return address on the stack so that return
9624 address can be reached via (argp - 1) slot. This is needed
9625 to implement macro RETURN_ADDR_RTX and intrinsic function
9626 expand_builtin_return_addr etc. */
9627 t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
9628 t = gen_frame_mem (Pmode, t);
9629 insn = emit_insn (gen_push (t));
9630 RTX_FRAME_RELATED_P (insn) = 1;
9632 /* For the purposes of frame and register save area addressing,
9633 we've started over with a new frame. */
9634 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9635 m->fs.realigned = true;
9638 if (frame_pointer_needed && !m->fs.fp_valid)
9640 /* Note: AT&T enter does NOT have reversed args. Enter is probably
9641 slower on all targets. Also sdb doesn't like it. */
9642 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
9643 RTX_FRAME_RELATED_P (insn) = 1;
9645 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
9646 RTX_FRAME_RELATED_P (insn) = 1;
9648 if (m->fs.cfa_reg == stack_pointer_rtx)
9649 m->fs.cfa_reg = hard_frame_pointer_rtx;
9650 gcc_assert (m->fs.sp_offset == frame.hard_frame_pointer_offset);
9651 m->fs.fp_offset = m->fs.sp_offset;
9652 m->fs.fp_valid = true;
9655 int_registers_saved = (frame.nregs == 0);
9657 if (!int_registers_saved)
9659 /* If saving registers via PUSH, do so now. */
9660 if (!frame.save_regs_using_mov)
9662 ix86_emit_save_regs ();
9663 int_registers_saved = true;
9664 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
9667 /* When using red zone we may start register saving before allocating
9668 the stack frame saving one cycle of the prologue. However, avoid
9669 doing this if we have to probe the stack; at least on x86_64 the
9670 stack probe can turn into a call that clobbers a red zone location. */
9671 else if (ix86_using_red_zone ()
9672 && (! TARGET_STACK_PROBE
9673 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
9675 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
9676 int_registers_saved = true;
9680 if (stack_realign_fp)
9682 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9683 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
9685 /* The computation of the size of the re-aligned stack frame means
9686 that we must allocate the size of the register save area before
9687 performing the actual alignment. Otherwise we cannot guarantee
9688 that there's enough storage above the realignment point. */
9689 if (m->fs.sp_offset != frame.sse_reg_save_offset)
9690 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9691 GEN_INT (m->fs.sp_offset
9692 - frame.sse_reg_save_offset),
9695 /* Align the stack. */
9696 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
9698 GEN_INT (-align_bytes)));
9700 /* For the purposes of register save area addressing, the stack
9701 pointer is no longer valid. As for the value of sp_offset,
9702 see ix86_compute_frame_layout, which we need to match in order
9703 to pass verification of stack_pointer_offset at the end. */
9704 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
9705 m->fs.sp_valid = false;
9708 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
9710 if (flag_stack_usage)
9712 /* We start to count from ARG_POINTER. */
9713 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
9715 /* If it was realigned, take into account the fake frame. */
9716 if (stack_realign_drap)
9718 if (ix86_static_chain_on_stack)
9719 stack_size += UNITS_PER_WORD;
9721 if (!call_used_regs[REGNO (crtl->drap_reg)])
9722 stack_size += UNITS_PER_WORD;
9724 /* This over-estimates by 1 minimal-stack-alignment-unit but
9725 mitigates that by counting in the new return address slot. */
9726 current_function_dynamic_stack_size
9727 += crtl->stack_alignment_needed / BITS_PER_UNIT;
9730 current_function_static_stack_size = stack_size;
9733 /* The stack has already been decremented by the instruction calling us
9734 so we need to probe unconditionally to preserve the protection area. */
9735 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
9737 /* We expect the registers to be saved when probes are used. */
9738 gcc_assert (int_registers_saved);
9740 if (STACK_CHECK_MOVING_SP)
9742 ix86_adjust_stack_and_probe (allocate);
9747 HOST_WIDE_INT size = allocate;
9749 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
9750 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
9752 if (TARGET_STACK_PROBE)
9753 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
9755 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
9761 else if (!ix86_target_stack_probe ()
9762 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
9764 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9765 GEN_INT (-allocate), -1,
9766 m->fs.cfa_reg == stack_pointer_rtx);
9770 rtx eax = gen_rtx_REG (Pmode, AX_REG);
9772 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
9774 bool eax_live = false;
9775 bool r10_live = false;
9778 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
9779 if (!TARGET_64BIT_MS_ABI)
9780 eax_live = ix86_eax_live_at_start_p ();
9784 emit_insn (gen_push (eax));
9785 allocate -= UNITS_PER_WORD;
9789 r10 = gen_rtx_REG (Pmode, R10_REG);
9790 emit_insn (gen_push (r10));
9791 allocate -= UNITS_PER_WORD;
9794 emit_move_insn (eax, GEN_INT (allocate));
9795 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
9797 /* Use the fact that AX still contains ALLOCATE. */
9798 adjust_stack_insn = (TARGET_64BIT
9799 ? gen_pro_epilogue_adjust_stack_di_sub
9800 : gen_pro_epilogue_adjust_stack_si_sub);
9802 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
9803 stack_pointer_rtx, eax));
9805 if (m->fs.cfa_reg == stack_pointer_rtx)
9807 m->fs.cfa_offset += allocate;
9809 RTX_FRAME_RELATED_P (insn) = 1;
9810 add_reg_note (insn, REG_CFA_ADJUST_CFA,
9811 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9812 plus_constant (stack_pointer_rtx,
9815 m->fs.sp_offset += allocate;
9817 if (r10_live && eax_live)
9819 t = choose_baseaddr (m->fs.sp_offset - allocate);
9820 emit_move_insn (r10, gen_frame_mem (Pmode, t));
9821 t = choose_baseaddr (m->fs.sp_offset - allocate - UNITS_PER_WORD);
9822 emit_move_insn (eax, gen_frame_mem (Pmode, t));
9824 else if (eax_live || r10_live)
9826 t = choose_baseaddr (m->fs.sp_offset - allocate);
9827 emit_move_insn ((eax_live ? eax : r10), gen_frame_mem (Pmode, t));
9830 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
9832 if (!int_registers_saved)
9833 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
9835 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
9837 pic_reg_used = false;
9838 if (pic_offset_table_rtx
9839 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9842 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
9844 if (alt_pic_reg_used != INVALID_REGNUM)
9845 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
9847 pic_reg_used = true;
9854 if (ix86_cmodel == CM_LARGE_PIC)
9856 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
9857 rtx label = gen_label_rtx ();
9859 LABEL_PRESERVE_P (label) = 1;
9860 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
9861 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
9862 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
9863 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
9864 pic_offset_table_rtx, tmp_reg));
9867 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
9870 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
9873 /* In the pic_reg_used case, make sure that the got load isn't deleted
9874 when mcount needs it. Blockage to avoid call movement across mcount
9875 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
9877 if (crtl->profile && !flag_fentry && pic_reg_used)
9878 emit_insn (gen_prologue_use (pic_offset_table_rtx));
9880 if (crtl->drap_reg && !crtl->stack_realign_needed)
9882 /* vDRAP is setup but after reload it turns out stack realign
9883 isn't necessary, here we will emit prologue to setup DRAP
9884 without stack realign adjustment */
9885 t = choose_baseaddr (0);
9886 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
9889 /* Prevent instructions from being scheduled into register save push
9890 sequence when access to the redzone area is done through frame pointer.
9891 The offset between the frame pointer and the stack pointer is calculated
9892 relative to the value of the stack pointer at the end of the function
9893 prologue, and moving instructions that access redzone area via frame
9894 pointer inside push sequence violates this assumption. */
9895 if (frame_pointer_needed && frame.red_zone_size)
9896 emit_insn (gen_memory_blockage ());
9898 /* Emit cld instruction if stringops are used in the function. */
9899 if (TARGET_CLD && ix86_current_function_needs_cld)
9900 emit_insn (gen_cld ());
9903 /* Emit code to restore REG using a POP insn. */
9906 ix86_emit_restore_reg_using_pop (rtx reg)
9908 struct machine_function *m = cfun->machine;
9909 rtx insn = emit_insn (gen_pop (reg));
9911 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
9912 m->fs.sp_offset -= UNITS_PER_WORD;
9914 if (m->fs.cfa_reg == crtl->drap_reg
9915 && REGNO (reg) == REGNO (crtl->drap_reg))
9917 /* Previously we'd represented the CFA as an expression
9918 like *(%ebp - 8). We've just popped that value from
9919 the stack, which means we need to reset the CFA to
9920 the drap register. This will remain until we restore
9921 the stack pointer. */
9922 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9923 RTX_FRAME_RELATED_P (insn) = 1;
9925 /* This means that the DRAP register is valid for addressing too. */
9926 m->fs.drap_valid = true;
9930 if (m->fs.cfa_reg == stack_pointer_rtx)
9932 rtx x = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
9933 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9934 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9935 RTX_FRAME_RELATED_P (insn) = 1;
9937 m->fs.cfa_offset -= UNITS_PER_WORD;
9940 /* When the frame pointer is the CFA, and we pop it, we are
9941 swapping back to the stack pointer as the CFA. This happens
9942 for stack frames that don't allocate other data, so we assume
9943 the stack pointer is now pointing at the return address, i.e.
9944 the function entry state, which makes the offset be 1 word. */
9945 if (reg == hard_frame_pointer_rtx)
9947 m->fs.fp_valid = false;
9948 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9950 m->fs.cfa_reg = stack_pointer_rtx;
9951 m->fs.cfa_offset -= UNITS_PER_WORD;
9953 add_reg_note (insn, REG_CFA_DEF_CFA,
9954 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
9955 GEN_INT (m->fs.cfa_offset)));
9956 RTX_FRAME_RELATED_P (insn) = 1;
9961 /* Emit code to restore saved registers using POP insns. */
9964 ix86_emit_restore_regs_using_pop (void)
9968 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9969 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
9970 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
9973 /* Emit code and notes for the LEAVE instruction. */
9976 ix86_emit_leave (void)
9978 struct machine_function *m = cfun->machine;
9979 rtx insn = emit_insn (ix86_gen_leave ());
9981 ix86_add_queued_cfa_restore_notes (insn);
9983 gcc_assert (m->fs.fp_valid);
9984 m->fs.sp_valid = true;
9985 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
9986 m->fs.fp_valid = false;
9988 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9990 m->fs.cfa_reg = stack_pointer_rtx;
9991 m->fs.cfa_offset = m->fs.sp_offset;
9993 add_reg_note (insn, REG_CFA_DEF_CFA,
9994 plus_constant (stack_pointer_rtx, m->fs.sp_offset));
9995 RTX_FRAME_RELATED_P (insn) = 1;
9996 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
10001 /* Emit code to restore saved registers using MOV insns.
10002 First register is restored from CFA - CFA_OFFSET. */
10004 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
10005 int maybe_eh_return)
10007 struct machine_function *m = cfun->machine;
10008 unsigned int regno;
10010 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10011 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10013 rtx reg = gen_rtx_REG (Pmode, regno);
10016 mem = choose_baseaddr (cfa_offset);
10017 mem = gen_frame_mem (Pmode, mem);
10018 insn = emit_move_insn (reg, mem);
10020 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
10022 /* Previously we'd represented the CFA as an expression
10023 like *(%ebp - 8). We've just popped that value from
10024 the stack, which means we need to reset the CFA to
10025 the drap register. This will remain until we restore
10026 the stack pointer. */
10027 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
10028 RTX_FRAME_RELATED_P (insn) = 1;
10030 /* This means that the DRAP register is valid for addressing. */
10031 m->fs.drap_valid = true;
10034 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10036 cfa_offset -= UNITS_PER_WORD;
10040 /* Emit code to restore saved registers using MOV insns.
10041 First register is restored from CFA - CFA_OFFSET. */
10043 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
10044 int maybe_eh_return)
10046 unsigned int regno;
10048 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10049 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
10051 rtx reg = gen_rtx_REG (V4SFmode, regno);
10054 mem = choose_baseaddr (cfa_offset);
10055 mem = gen_rtx_MEM (V4SFmode, mem);
10056 set_mem_align (mem, 128);
10057 emit_move_insn (reg, mem);
10059 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
10065 /* Restore function stack, frame, and registers. */
10068 ix86_expand_epilogue (int style)
10070 struct machine_function *m = cfun->machine;
10071 struct machine_frame_state frame_state_save = m->fs;
10072 struct ix86_frame frame;
10073 bool restore_regs_via_mov;
10076 ix86_finalize_stack_realign_flags ();
10077 ix86_compute_frame_layout (&frame);
10079 m->fs.sp_valid = (!frame_pointer_needed
10080 || (current_function_sp_is_unchanging
10081 && !stack_realign_fp));
10082 gcc_assert (!m->fs.sp_valid
10083 || m->fs.sp_offset == frame.stack_pointer_offset);
10085 /* The FP must be valid if the frame pointer is present. */
10086 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
10087 gcc_assert (!m->fs.fp_valid
10088 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
10090 /* We must have *some* valid pointer to the stack frame. */
10091 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
10093 /* The DRAP is never valid at this point. */
10094 gcc_assert (!m->fs.drap_valid);
10096 /* See the comment about red zone and frame
10097 pointer usage in ix86_expand_prologue. */
10098 if (frame_pointer_needed && frame.red_zone_size)
10099 emit_insn (gen_memory_blockage ());
10101 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
10102 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
10104 /* Determine the CFA offset of the end of the red-zone. */
10105 m->fs.red_zone_offset = 0;
10106 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
10108 /* The red-zone begins below the return address. */
10109 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
10111 /* When the register save area is in the aligned portion of
10112 the stack, determine the maximum runtime displacement that
10113 matches up with the aligned frame. */
10114 if (stack_realign_drap)
10115 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10119 /* Special care must be taken for the normal return case of a function
10120 using eh_return: the eax and edx registers are marked as saved, but
10121 not restored along this path. Adjust the save location to match. */
10122 if (crtl->calls_eh_return && style != 2)
10123 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
10125 /* If we're only restoring one register and sp is not valid then
10126 using a move instruction to restore the register since it's
10127 less work than reloading sp and popping the register. */
10128 if (!m->fs.sp_valid && frame.nregs <= 1)
10129 restore_regs_via_mov = true;
10130 /* EH_RETURN requires the use of moves to function properly. */
10131 else if (crtl->calls_eh_return)
10132 restore_regs_via_mov = true;
10133 else if (TARGET_EPILOGUE_USING_MOVE
10134 && cfun->machine->use_fast_prologue_epilogue
10135 && (frame.nregs > 1
10136 || m->fs.sp_offset != frame.reg_save_offset))
10137 restore_regs_via_mov = true;
10138 else if (frame_pointer_needed
10140 && m->fs.sp_offset != frame.reg_save_offset)
10141 restore_regs_via_mov = true;
10142 else if (frame_pointer_needed
10143 && TARGET_USE_LEAVE
10144 && cfun->machine->use_fast_prologue_epilogue
10145 && frame.nregs == 1)
10146 restore_regs_via_mov = true;
10148 restore_regs_via_mov = false;
10150 if (restore_regs_via_mov || frame.nsseregs)
10152 /* Ensure that the entire register save area is addressable via
10153 the stack pointer, if we will restore via sp. */
10155 && m->fs.sp_offset > 0x7fffffff
10156 && !(m->fs.fp_valid || m->fs.drap_valid)
10157 && (frame.nsseregs + frame.nregs) != 0)
10159 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10160 GEN_INT (m->fs.sp_offset
10161 - frame.sse_reg_save_offset),
10163 m->fs.cfa_reg == stack_pointer_rtx);
10167 /* If there are any SSE registers to restore, then we have to do it
10168 via moves, since there's obviously no pop for SSE regs. */
10169 if (frame.nsseregs)
10170 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
10173 if (restore_regs_via_mov)
10178 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
10180 /* eh_return epilogues need %ecx added to the stack pointer. */
10183 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
10185 /* Stack align doesn't work with eh_return. */
10186 gcc_assert (!stack_realign_drap);
10187 /* Neither does regparm nested functions. */
10188 gcc_assert (!ix86_static_chain_on_stack);
10190 if (frame_pointer_needed)
10192 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
10193 t = plus_constant (t, m->fs.fp_offset - UNITS_PER_WORD);
10194 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
10196 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
10197 insn = emit_move_insn (hard_frame_pointer_rtx, t);
10199 /* Note that we use SA as a temporary CFA, as the return
10200 address is at the proper place relative to it. We
10201 pretend this happens at the FP restore insn because
10202 prior to this insn the FP would be stored at the wrong
10203 offset relative to SA, and after this insn we have no
10204 other reasonable register to use for the CFA. We don't
10205 bother resetting the CFA to the SP for the duration of
10206 the return insn. */
10207 add_reg_note (insn, REG_CFA_DEF_CFA,
10208 plus_constant (sa, UNITS_PER_WORD));
10209 ix86_add_queued_cfa_restore_notes (insn);
10210 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
10211 RTX_FRAME_RELATED_P (insn) = 1;
10213 m->fs.cfa_reg = sa;
10214 m->fs.cfa_offset = UNITS_PER_WORD;
10215 m->fs.fp_valid = false;
10217 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
10218 const0_rtx, style, false);
10222 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
10223 t = plus_constant (t, m->fs.sp_offset - UNITS_PER_WORD);
10224 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
10225 ix86_add_queued_cfa_restore_notes (insn);
10227 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10228 if (m->fs.cfa_offset != UNITS_PER_WORD)
10230 m->fs.cfa_offset = UNITS_PER_WORD;
10231 add_reg_note (insn, REG_CFA_DEF_CFA,
10232 plus_constant (stack_pointer_rtx,
10234 RTX_FRAME_RELATED_P (insn) = 1;
10237 m->fs.sp_offset = UNITS_PER_WORD;
10238 m->fs.sp_valid = true;
10243 /* First step is to deallocate the stack frame so that we can
10244 pop the registers. */
10245 if (!m->fs.sp_valid)
10247 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
10248 GEN_INT (m->fs.fp_offset
10249 - frame.reg_save_offset),
10252 else if (m->fs.sp_offset != frame.reg_save_offset)
10254 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10255 GEN_INT (m->fs.sp_offset
10256 - frame.reg_save_offset),
10258 m->fs.cfa_reg == stack_pointer_rtx);
10261 ix86_emit_restore_regs_using_pop ();
10264 /* If we used a stack pointer and haven't already got rid of it,
10266 if (m->fs.fp_valid)
10268 /* If the stack pointer is valid and pointing at the frame
10269 pointer store address, then we only need a pop. */
10270 if (m->fs.sp_valid && m->fs.sp_offset == frame.hard_frame_pointer_offset)
10271 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10272 /* Leave results in shorter dependency chains on CPUs that are
10273 able to grok it fast. */
10274 else if (TARGET_USE_LEAVE
10275 || optimize_function_for_size_p (cfun)
10276 || !cfun->machine->use_fast_prologue_epilogue)
10277 ix86_emit_leave ();
10280 pro_epilogue_adjust_stack (stack_pointer_rtx,
10281 hard_frame_pointer_rtx,
10282 const0_rtx, style, !using_drap);
10283 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10289 int param_ptr_offset = UNITS_PER_WORD;
10292 gcc_assert (stack_realign_drap);
10294 if (ix86_static_chain_on_stack)
10295 param_ptr_offset += UNITS_PER_WORD;
10296 if (!call_used_regs[REGNO (crtl->drap_reg)])
10297 param_ptr_offset += UNITS_PER_WORD;
10299 insn = emit_insn (gen_rtx_SET
10300 (VOIDmode, stack_pointer_rtx,
10301 gen_rtx_PLUS (Pmode,
10303 GEN_INT (-param_ptr_offset))));
10304 m->fs.cfa_reg = stack_pointer_rtx;
10305 m->fs.cfa_offset = param_ptr_offset;
10306 m->fs.sp_offset = param_ptr_offset;
10307 m->fs.realigned = false;
10309 add_reg_note (insn, REG_CFA_DEF_CFA,
10310 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10311 GEN_INT (param_ptr_offset)));
10312 RTX_FRAME_RELATED_P (insn) = 1;
10314 if (!call_used_regs[REGNO (crtl->drap_reg)])
10315 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
10318 /* At this point the stack pointer must be valid, and we must have
10319 restored all of the registers. We may not have deallocated the
10320 entire stack frame. We've delayed this until now because it may
10321 be possible to merge the local stack deallocation with the
10322 deallocation forced by ix86_static_chain_on_stack. */
10323 gcc_assert (m->fs.sp_valid);
10324 gcc_assert (!m->fs.fp_valid);
10325 gcc_assert (!m->fs.realigned);
10326 if (m->fs.sp_offset != UNITS_PER_WORD)
10328 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10329 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
10333 /* Sibcall epilogues don't want a return instruction. */
10336 m->fs = frame_state_save;
10340 if (crtl->args.pops_args && crtl->args.size)
10342 rtx popc = GEN_INT (crtl->args.pops_args);
10344 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10345 address, do explicit add, and jump indirectly to the caller. */
10347 if (crtl->args.pops_args >= 65536)
10349 rtx ecx = gen_rtx_REG (SImode, CX_REG);
10352 /* There is no "pascal" calling convention in any 64bit ABI. */
10353 gcc_assert (!TARGET_64BIT);
10355 insn = emit_insn (gen_pop (ecx));
10356 m->fs.cfa_offset -= UNITS_PER_WORD;
10357 m->fs.sp_offset -= UNITS_PER_WORD;
10359 add_reg_note (insn, REG_CFA_ADJUST_CFA,
10360 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
10361 add_reg_note (insn, REG_CFA_REGISTER,
10362 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
10363 RTX_FRAME_RELATED_P (insn) = 1;
10365 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10367 emit_jump_insn (gen_return_indirect_internal (ecx));
10370 emit_jump_insn (gen_return_pop_internal (popc));
10373 emit_jump_insn (gen_return_internal ());
10375 /* Restore the state back to the state from the prologue,
10376 so that it's correct for the next epilogue. */
10377 m->fs = frame_state_save;
10380 /* Reset from the function's potential modifications. */
10383 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
10384 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
10386 if (pic_offset_table_rtx)
10387 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
10389 /* Mach-O doesn't support labels at the end of objects, so if
10390 it looks like we might want one, insert a NOP. */
10392 rtx insn = get_last_insn ();
10395 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
10396 insn = PREV_INSN (insn);
10400 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
10401 fputs ("\tnop\n", file);
10407 /* Return a scratch register to use in the split stack prologue. The
10408 split stack prologue is used for -fsplit-stack. It is the first
10409 instructions in the function, even before the regular prologue.
10410 The scratch register can be any caller-saved register which is not
10411 used for parameters or for the static chain. */
10413 static unsigned int
10414 split_stack_prologue_scratch_regno (void)
10423 is_fastcall = (lookup_attribute ("fastcall",
10424 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
10426 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
10430 if (DECL_STATIC_CHAIN (cfun->decl))
10432 sorry ("-fsplit-stack does not support fastcall with "
10433 "nested function");
10434 return INVALID_REGNUM;
10438 else if (regparm < 3)
10440 if (!DECL_STATIC_CHAIN (cfun->decl))
10446 sorry ("-fsplit-stack does not support 2 register "
10447 " parameters for a nested function");
10448 return INVALID_REGNUM;
10455 /* FIXME: We could make this work by pushing a register
10456 around the addition and comparison. */
10457 sorry ("-fsplit-stack does not support 3 register parameters");
10458 return INVALID_REGNUM;
10463 /* A SYMBOL_REF for the function which allocates new stackspace for
10466 static GTY(()) rtx split_stack_fn;
10468 /* Handle -fsplit-stack. These are the first instructions in the
10469 function, even before the regular prologue. */
10472 ix86_expand_split_stack_prologue (void)
10474 struct ix86_frame frame;
10475 HOST_WIDE_INT allocate;
10477 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
10478 rtx scratch_reg = NULL_RTX;
10479 rtx varargs_label = NULL_RTX;
10481 gcc_assert (flag_split_stack && reload_completed);
10483 ix86_finalize_stack_realign_flags ();
10484 ix86_compute_frame_layout (&frame);
10485 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
10487 /* This is the label we will branch to if we have enough stack
10488 space. We expect the basic block reordering pass to reverse this
10489 branch if optimizing, so that we branch in the unlikely case. */
10490 label = gen_label_rtx ();
10492 /* We need to compare the stack pointer minus the frame size with
10493 the stack boundary in the TCB. The stack boundary always gives
10494 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
10495 can compare directly. Otherwise we need to do an addition. */
10497 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
10498 UNSPEC_STACK_CHECK);
10499 limit = gen_rtx_CONST (Pmode, limit);
10500 limit = gen_rtx_MEM (Pmode, limit);
10501 if (allocate < SPLIT_STACK_AVAILABLE)
10502 current = stack_pointer_rtx;
10505 unsigned int scratch_regno;
10508 /* We need a scratch register to hold the stack pointer minus
10509 the required frame size. Since this is the very start of the
10510 function, the scratch register can be any caller-saved
10511 register which is not used for parameters. */
10512 offset = GEN_INT (- allocate);
10513 scratch_regno = split_stack_prologue_scratch_regno ();
10514 if (scratch_regno == INVALID_REGNUM)
10516 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10517 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
10519 /* We don't use ix86_gen_add3 in this case because it will
10520 want to split to lea, but when not optimizing the insn
10521 will not be split after this point. */
10522 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
10523 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10528 emit_move_insn (scratch_reg, offset);
10529 emit_insn (gen_adddi3 (scratch_reg, scratch_reg,
10530 stack_pointer_rtx));
10532 current = scratch_reg;
10535 ix86_expand_branch (GEU, current, limit, label);
10536 jump_insn = get_last_insn ();
10537 JUMP_LABEL (jump_insn) = label;
10539 /* Mark the jump as very likely to be taken. */
10540 add_reg_note (jump_insn, REG_BR_PROB,
10541 GEN_INT (REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100));
10543 /* Get more stack space. We pass in the desired stack space and the
10544 size of the arguments to copy to the new stack. In 32-bit mode
10545 we push the parameters; __morestack will return on a new stack
10546 anyhow. In 64-bit mode we pass the parameters in r10 and
10548 allocate_rtx = GEN_INT (allocate);
10549 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
10550 call_fusage = NULL_RTX;
10555 reg = gen_rtx_REG (Pmode, R10_REG);
10557 /* If this function uses a static chain, it will be in %r10.
10558 Preserve it across the call to __morestack. */
10559 if (DECL_STATIC_CHAIN (cfun->decl))
10563 rax = gen_rtx_REG (Pmode, AX_REG);
10564 emit_move_insn (rax, reg);
10565 use_reg (&call_fusage, rax);
10568 emit_move_insn (reg, allocate_rtx);
10569 use_reg (&call_fusage, reg);
10570 reg = gen_rtx_REG (Pmode, R11_REG);
10571 emit_move_insn (reg, GEN_INT (args_size));
10572 use_reg (&call_fusage, reg);
10576 emit_insn (gen_push (GEN_INT (args_size)));
10577 emit_insn (gen_push (allocate_rtx));
10579 if (split_stack_fn == NULL_RTX)
10580 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
10581 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, split_stack_fn),
10582 GEN_INT (UNITS_PER_WORD), constm1_rtx,
10584 add_function_usage_to (call_insn, call_fusage);
10586 /* In order to make call/return prediction work right, we now need
10587 to execute a return instruction. See
10588 libgcc/config/i386/morestack.S for the details on how this works.
10590 For flow purposes gcc must not see this as a return
10591 instruction--we need control flow to continue at the subsequent
10592 label. Therefore, we use an unspec. */
10593 gcc_assert (crtl->args.pops_args < 65536);
10594 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
10596 /* If we are in 64-bit mode and this function uses a static chain,
10597 we saved %r10 in %rax before calling _morestack. */
10598 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
10599 emit_move_insn (gen_rtx_REG (Pmode, R10_REG),
10600 gen_rtx_REG (Pmode, AX_REG));
10602 /* If this function calls va_start, we need to store a pointer to
10603 the arguments on the old stack, because they may not have been
10604 all copied to the new stack. At this point the old stack can be
10605 found at the frame pointer value used by __morestack, because
10606 __morestack has set that up before calling back to us. Here we
10607 store that pointer in a scratch register, and in
10608 ix86_expand_prologue we store the scratch register in a stack
10610 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10612 unsigned int scratch_regno;
10616 scratch_regno = split_stack_prologue_scratch_regno ();
10617 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
10618 frame_reg = gen_rtx_REG (Pmode, BP_REG);
10622 return address within this function
10623 return address of caller of this function
10625 So we add three words to get to the stack arguments.
10629 return address within this function
10630 first argument to __morestack
10631 second argument to __morestack
10632 return address of caller of this function
10634 So we add five words to get to the stack arguments.
10636 words = TARGET_64BIT ? 3 : 5;
10637 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
10638 gen_rtx_PLUS (Pmode, frame_reg,
10639 GEN_INT (words * UNITS_PER_WORD))));
10641 varargs_label = gen_label_rtx ();
10642 emit_jump_insn (gen_jump (varargs_label));
10643 JUMP_LABEL (get_last_insn ()) = varargs_label;
10648 emit_label (label);
10649 LABEL_NUSES (label) = 1;
10651 /* If this function calls va_start, we now have to set the scratch
10652 register for the case where we do not call __morestack. In this
10653 case we need to set it based on the stack pointer. */
10654 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10656 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
10657 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10658 GEN_INT (UNITS_PER_WORD))));
10660 emit_label (varargs_label);
10661 LABEL_NUSES (varargs_label) = 1;
10665 /* We may have to tell the dataflow pass that the split stack prologue
10666 is initializing a scratch register. */
10669 ix86_live_on_entry (bitmap regs)
10671 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
10673 gcc_assert (flag_split_stack);
10674 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
10678 /* Extract the parts of an RTL expression that is a valid memory address
10679 for an instruction. Return 0 if the structure of the address is
10680 grossly off. Return -1 if the address contains ASHIFT, so it is not
10681 strictly valid, but still used for computing length of lea instruction. */
10684 ix86_decompose_address (rtx addr, struct ix86_address *out)
10686 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
10687 rtx base_reg, index_reg;
10688 HOST_WIDE_INT scale = 1;
10689 rtx scale_rtx = NULL_RTX;
10692 enum ix86_address_seg seg = SEG_DEFAULT;
10694 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
10696 else if (GET_CODE (addr) == PLUS)
10698 rtx addends[4], op;
10706 addends[n++] = XEXP (op, 1);
10709 while (GET_CODE (op) == PLUS);
10714 for (i = n; i >= 0; --i)
10717 switch (GET_CODE (op))
10722 index = XEXP (op, 0);
10723 scale_rtx = XEXP (op, 1);
10729 index = XEXP (op, 0);
10730 tmp = XEXP (op, 1);
10731 if (!CONST_INT_P (tmp))
10733 scale = INTVAL (tmp);
10734 if ((unsigned HOST_WIDE_INT) scale > 3)
10736 scale = 1 << scale;
10740 if (XINT (op, 1) == UNSPEC_TP
10741 && TARGET_TLS_DIRECT_SEG_REFS
10742 && seg == SEG_DEFAULT)
10743 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
10772 else if (GET_CODE (addr) == MULT)
10774 index = XEXP (addr, 0); /* index*scale */
10775 scale_rtx = XEXP (addr, 1);
10777 else if (GET_CODE (addr) == ASHIFT)
10779 /* We're called for lea too, which implements ashift on occasion. */
10780 index = XEXP (addr, 0);
10781 tmp = XEXP (addr, 1);
10782 if (!CONST_INT_P (tmp))
10784 scale = INTVAL (tmp);
10785 if ((unsigned HOST_WIDE_INT) scale > 3)
10787 scale = 1 << scale;
10791 disp = addr; /* displacement */
10793 /* Extract the integral value of scale. */
10796 if (!CONST_INT_P (scale_rtx))
10798 scale = INTVAL (scale_rtx);
10801 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
10802 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
10804 /* Avoid useless 0 displacement. */
10805 if (disp == const0_rtx && (base || index))
10808 /* Allow arg pointer and stack pointer as index if there is not scaling. */
10809 if (base_reg && index_reg && scale == 1
10810 && (index_reg == arg_pointer_rtx
10811 || index_reg == frame_pointer_rtx
10812 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
10815 tmp = base, base = index, index = tmp;
10816 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
10819 /* Special case: %ebp cannot be encoded as a base without a displacement.
10823 && (base_reg == hard_frame_pointer_rtx
10824 || base_reg == frame_pointer_rtx
10825 || base_reg == arg_pointer_rtx
10826 || (REG_P (base_reg)
10827 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
10828 || REGNO (base_reg) == R13_REG))))
10831 /* Special case: on K6, [%esi] makes the instruction vector decoded.
10832 Avoid this by transforming to [%esi+0].
10833 Reload calls address legitimization without cfun defined, so we need
10834 to test cfun for being non-NULL. */
10835 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
10836 && base_reg && !index_reg && !disp
10837 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
10840 /* Special case: encode reg+reg instead of reg*2. */
10841 if (!base && index && scale == 2)
10842 base = index, base_reg = index_reg, scale = 1;
10844 /* Special case: scaling cannot be encoded without base or displacement. */
10845 if (!base && !disp && index && scale != 1)
10849 out->index = index;
10851 out->scale = scale;
10857 /* Return cost of the memory address x.
10858 For i386, it is better to use a complex address than let gcc copy
10859 the address into a reg and make a new pseudo. But not if the address
10860 requires to two regs - that would mean more pseudos with longer
10863 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
10865 struct ix86_address parts;
10867 int ok = ix86_decompose_address (x, &parts);
10871 if (parts.base && GET_CODE (parts.base) == SUBREG)
10872 parts.base = SUBREG_REG (parts.base);
10873 if (parts.index && GET_CODE (parts.index) == SUBREG)
10874 parts.index = SUBREG_REG (parts.index);
10876 /* Attempt to minimize number of registers in the address. */
10878 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
10880 && (!REG_P (parts.index)
10881 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
10885 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
10887 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
10888 && parts.base != parts.index)
10891 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
10892 since it's predecode logic can't detect the length of instructions
10893 and it degenerates to vector decoded. Increase cost of such
10894 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
10895 to split such addresses or even refuse such addresses at all.
10897 Following addressing modes are affected:
10902 The first and last case may be avoidable by explicitly coding the zero in
10903 memory address, but I don't have AMD-K6 machine handy to check this
10907 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
10908 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
10909 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
10915 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
10916 this is used for to form addresses to local data when -fPIC is in
10920 darwin_local_data_pic (rtx disp)
10922 return (GET_CODE (disp) == UNSPEC
10923 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
10926 /* Determine if a given RTX is a valid constant. We already know this
10927 satisfies CONSTANT_P. */
10930 legitimate_constant_p (rtx x)
10932 switch (GET_CODE (x))
10937 if (GET_CODE (x) == PLUS)
10939 if (!CONST_INT_P (XEXP (x, 1)))
10944 if (TARGET_MACHO && darwin_local_data_pic (x))
10947 /* Only some unspecs are valid as "constants". */
10948 if (GET_CODE (x) == UNSPEC)
10949 switch (XINT (x, 1))
10952 case UNSPEC_GOTOFF:
10953 case UNSPEC_PLTOFF:
10954 return TARGET_64BIT;
10956 case UNSPEC_NTPOFF:
10957 x = XVECEXP (x, 0, 0);
10958 return (GET_CODE (x) == SYMBOL_REF
10959 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
10960 case UNSPEC_DTPOFF:
10961 x = XVECEXP (x, 0, 0);
10962 return (GET_CODE (x) == SYMBOL_REF
10963 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
10968 /* We must have drilled down to a symbol. */
10969 if (GET_CODE (x) == LABEL_REF)
10971 if (GET_CODE (x) != SYMBOL_REF)
10976 /* TLS symbols are never valid. */
10977 if (SYMBOL_REF_TLS_MODEL (x))
10980 /* DLLIMPORT symbols are never valid. */
10981 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10982 && SYMBOL_REF_DLLIMPORT_P (x))
10987 if (GET_MODE (x) == TImode
10988 && x != CONST0_RTX (TImode)
10994 if (!standard_sse_constant_p (x))
11001 /* Otherwise we handle everything else in the move patterns. */
11005 /* Determine if it's legal to put X into the constant pool. This
11006 is not possible for the address of thread-local symbols, which
11007 is checked above. */
11010 ix86_cannot_force_const_mem (rtx x)
11012 /* We can always put integral constants and vectors in memory. */
11013 switch (GET_CODE (x))
11023 return !legitimate_constant_p (x);
11027 /* Nonzero if the constant value X is a legitimate general operand
11028 when generating PIC code. It is given that flag_pic is on and
11029 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
11032 legitimate_pic_operand_p (rtx x)
11036 switch (GET_CODE (x))
11039 inner = XEXP (x, 0);
11040 if (GET_CODE (inner) == PLUS
11041 && CONST_INT_P (XEXP (inner, 1)))
11042 inner = XEXP (inner, 0);
11044 /* Only some unspecs are valid as "constants". */
11045 if (GET_CODE (inner) == UNSPEC)
11046 switch (XINT (inner, 1))
11049 case UNSPEC_GOTOFF:
11050 case UNSPEC_PLTOFF:
11051 return TARGET_64BIT;
11053 x = XVECEXP (inner, 0, 0);
11054 return (GET_CODE (x) == SYMBOL_REF
11055 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
11056 case UNSPEC_MACHOPIC_OFFSET:
11057 return legitimate_pic_address_disp_p (x);
11065 return legitimate_pic_address_disp_p (x);
11072 /* Determine if a given CONST RTX is a valid memory displacement
11076 legitimate_pic_address_disp_p (rtx disp)
11080 /* In 64bit mode we can allow direct addresses of symbols and labels
11081 when they are not dynamic symbols. */
11084 rtx op0 = disp, op1;
11086 switch (GET_CODE (disp))
11092 if (GET_CODE (XEXP (disp, 0)) != PLUS)
11094 op0 = XEXP (XEXP (disp, 0), 0);
11095 op1 = XEXP (XEXP (disp, 0), 1);
11096 if (!CONST_INT_P (op1)
11097 || INTVAL (op1) >= 16*1024*1024
11098 || INTVAL (op1) < -16*1024*1024)
11100 if (GET_CODE (op0) == LABEL_REF)
11102 if (GET_CODE (op0) != SYMBOL_REF)
11107 /* TLS references should always be enclosed in UNSPEC. */
11108 if (SYMBOL_REF_TLS_MODEL (op0))
11110 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
11111 && ix86_cmodel != CM_LARGE_PIC)
11119 if (GET_CODE (disp) != CONST)
11121 disp = XEXP (disp, 0);
11125 /* We are unsafe to allow PLUS expressions. This limit allowed distance
11126 of GOT tables. We should not need these anyway. */
11127 if (GET_CODE (disp) != UNSPEC
11128 || (XINT (disp, 1) != UNSPEC_GOTPCREL
11129 && XINT (disp, 1) != UNSPEC_GOTOFF
11130 && XINT (disp, 1) != UNSPEC_PLTOFF))
11133 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
11134 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
11140 if (GET_CODE (disp) == PLUS)
11142 if (!CONST_INT_P (XEXP (disp, 1)))
11144 disp = XEXP (disp, 0);
11148 if (TARGET_MACHO && darwin_local_data_pic (disp))
11151 if (GET_CODE (disp) != UNSPEC)
11154 switch (XINT (disp, 1))
11159 /* We need to check for both symbols and labels because VxWorks loads
11160 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
11162 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11163 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
11164 case UNSPEC_GOTOFF:
11165 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
11166 While ABI specify also 32bit relocation but we don't produce it in
11167 small PIC model at all. */
11168 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
11169 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
11171 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
11173 case UNSPEC_GOTTPOFF:
11174 case UNSPEC_GOTNTPOFF:
11175 case UNSPEC_INDNTPOFF:
11178 disp = XVECEXP (disp, 0, 0);
11179 return (GET_CODE (disp) == SYMBOL_REF
11180 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
11181 case UNSPEC_NTPOFF:
11182 disp = XVECEXP (disp, 0, 0);
11183 return (GET_CODE (disp) == SYMBOL_REF
11184 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
11185 case UNSPEC_DTPOFF:
11186 disp = XVECEXP (disp, 0, 0);
11187 return (GET_CODE (disp) == SYMBOL_REF
11188 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
11194 /* Recognizes RTL expressions that are valid memory addresses for an
11195 instruction. The MODE argument is the machine mode for the MEM
11196 expression that wants to use this address.
11198 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
11199 convert common non-canonical forms to canonical form so that they will
11203 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
11204 rtx addr, bool strict)
11206 struct ix86_address parts;
11207 rtx base, index, disp;
11208 HOST_WIDE_INT scale;
11210 if (ix86_decompose_address (addr, &parts) <= 0)
11211 /* Decomposition failed. */
11215 index = parts.index;
11217 scale = parts.scale;
11219 /* Validate base register.
11221 Don't allow SUBREG's that span more than a word here. It can lead to spill
11222 failures when the base is one word out of a two word structure, which is
11223 represented internally as a DImode int. */
11231 else if (GET_CODE (base) == SUBREG
11232 && REG_P (SUBREG_REG (base))
11233 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
11235 reg = SUBREG_REG (base);
11237 /* Base is not a register. */
11240 if (GET_MODE (base) != Pmode)
11241 /* Base is not in Pmode. */
11244 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
11245 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
11246 /* Base is not valid. */
11250 /* Validate index register.
11252 Don't allow SUBREG's that span more than a word here -- same as above. */
11260 else if (GET_CODE (index) == SUBREG
11261 && REG_P (SUBREG_REG (index))
11262 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
11264 reg = SUBREG_REG (index);
11266 /* Index is not a register. */
11269 if (GET_MODE (index) != Pmode)
11270 /* Index is not in Pmode. */
11273 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
11274 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
11275 /* Index is not valid. */
11279 /* Validate scale factor. */
11283 /* Scale without index. */
11286 if (scale != 2 && scale != 4 && scale != 8)
11287 /* Scale is not a valid multiplier. */
11291 /* Validate displacement. */
11294 if (GET_CODE (disp) == CONST
11295 && GET_CODE (XEXP (disp, 0)) == UNSPEC
11296 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
11297 switch (XINT (XEXP (disp, 0), 1))
11299 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
11300 used. While ABI specify also 32bit relocations, we don't produce
11301 them at all and use IP relative instead. */
11303 case UNSPEC_GOTOFF:
11304 gcc_assert (flag_pic);
11306 goto is_legitimate_pic;
11308 /* 64bit address unspec. */
11311 case UNSPEC_GOTPCREL:
11312 gcc_assert (flag_pic);
11313 goto is_legitimate_pic;
11315 case UNSPEC_GOTTPOFF:
11316 case UNSPEC_GOTNTPOFF:
11317 case UNSPEC_INDNTPOFF:
11318 case UNSPEC_NTPOFF:
11319 case UNSPEC_DTPOFF:
11322 case UNSPEC_STACK_CHECK:
11323 gcc_assert (flag_split_stack);
11327 /* Invalid address unspec. */
11331 else if (SYMBOLIC_CONST (disp)
11335 && MACHOPIC_INDIRECT
11336 && !machopic_operand_p (disp)
11342 if (TARGET_64BIT && (index || base))
11344 /* foo@dtpoff(%rX) is ok. */
11345 if (GET_CODE (disp) != CONST
11346 || GET_CODE (XEXP (disp, 0)) != PLUS
11347 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
11348 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
11349 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
11350 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
11351 /* Non-constant pic memory reference. */
11354 else if (! legitimate_pic_address_disp_p (disp))
11355 /* Displacement is an invalid pic construct. */
11358 /* This code used to verify that a symbolic pic displacement
11359 includes the pic_offset_table_rtx register.
11361 While this is good idea, unfortunately these constructs may
11362 be created by "adds using lea" optimization for incorrect
11371 This code is nonsensical, but results in addressing
11372 GOT table with pic_offset_table_rtx base. We can't
11373 just refuse it easily, since it gets matched by
11374 "addsi3" pattern, that later gets split to lea in the
11375 case output register differs from input. While this
11376 can be handled by separate addsi pattern for this case
11377 that never results in lea, this seems to be easier and
11378 correct fix for crash to disable this test. */
11380 else if (GET_CODE (disp) != LABEL_REF
11381 && !CONST_INT_P (disp)
11382 && (GET_CODE (disp) != CONST
11383 || !legitimate_constant_p (disp))
11384 && (GET_CODE (disp) != SYMBOL_REF
11385 || !legitimate_constant_p (disp)))
11386 /* Displacement is not constant. */
11388 else if (TARGET_64BIT
11389 && !x86_64_immediate_operand (disp, VOIDmode))
11390 /* Displacement is out of range. */
11394 /* Everything looks valid. */
11398 /* Determine if a given RTX is a valid constant address. */
11401 constant_address_p (rtx x)
11403 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
11406 /* Return a unique alias set for the GOT. */
11408 static alias_set_type
11409 ix86_GOT_alias_set (void)
11411 static alias_set_type set = -1;
11413 set = new_alias_set ();
11417 /* Return a legitimate reference for ORIG (an address) using the
11418 register REG. If REG is 0, a new pseudo is generated.
11420 There are two types of references that must be handled:
11422 1. Global data references must load the address from the GOT, via
11423 the PIC reg. An insn is emitted to do this load, and the reg is
11426 2. Static data references, constant pool addresses, and code labels
11427 compute the address as an offset from the GOT, whose base is in
11428 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
11429 differentiate them from global data objects. The returned
11430 address is the PIC reg + an unspec constant.
11432 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
11433 reg also appears in the address. */
11436 legitimize_pic_address (rtx orig, rtx reg)
11439 rtx new_rtx = orig;
11443 if (TARGET_MACHO && !TARGET_64BIT)
11446 reg = gen_reg_rtx (Pmode);
11447 /* Use the generic Mach-O PIC machinery. */
11448 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
11452 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
11454 else if (TARGET_64BIT
11455 && ix86_cmodel != CM_SMALL_PIC
11456 && gotoff_operand (addr, Pmode))
11459 /* This symbol may be referenced via a displacement from the PIC
11460 base address (@GOTOFF). */
11462 if (reload_in_progress)
11463 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11464 if (GET_CODE (addr) == CONST)
11465 addr = XEXP (addr, 0);
11466 if (GET_CODE (addr) == PLUS)
11468 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11470 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
11473 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
11474 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11476 tmpreg = gen_reg_rtx (Pmode);
11479 emit_move_insn (tmpreg, new_rtx);
11483 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
11484 tmpreg, 1, OPTAB_DIRECT);
11487 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
11489 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
11491 /* This symbol may be referenced via a displacement from the PIC
11492 base address (@GOTOFF). */
11494 if (reload_in_progress)
11495 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11496 if (GET_CODE (addr) == CONST)
11497 addr = XEXP (addr, 0);
11498 if (GET_CODE (addr) == PLUS)
11500 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11502 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
11505 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
11506 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11507 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11511 emit_move_insn (reg, new_rtx);
11515 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
11516 /* We can't use @GOTOFF for text labels on VxWorks;
11517 see gotoff_operand. */
11518 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
11520 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11522 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
11523 return legitimize_dllimport_symbol (addr, true);
11524 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
11525 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
11526 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
11528 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
11529 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
11533 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
11535 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
11536 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11537 new_rtx = gen_const_mem (Pmode, new_rtx);
11538 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
11541 reg = gen_reg_rtx (Pmode);
11542 /* Use directly gen_movsi, otherwise the address is loaded
11543 into register for CSE. We don't want to CSE this addresses,
11544 instead we CSE addresses from the GOT table, so skip this. */
11545 emit_insn (gen_movsi (reg, new_rtx));
11550 /* This symbol must be referenced via a load from the
11551 Global Offset Table (@GOT). */
11553 if (reload_in_progress)
11554 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11555 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
11556 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11558 new_rtx = force_reg (Pmode, new_rtx);
11559 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11560 new_rtx = gen_const_mem (Pmode, new_rtx);
11561 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
11564 reg = gen_reg_rtx (Pmode);
11565 emit_move_insn (reg, new_rtx);
11571 if (CONST_INT_P (addr)
11572 && !x86_64_immediate_operand (addr, VOIDmode))
11576 emit_move_insn (reg, addr);
11580 new_rtx = force_reg (Pmode, addr);
11582 else if (GET_CODE (addr) == CONST)
11584 addr = XEXP (addr, 0);
11586 /* We must match stuff we generate before. Assume the only
11587 unspecs that can get here are ours. Not that we could do
11588 anything with them anyway.... */
11589 if (GET_CODE (addr) == UNSPEC
11590 || (GET_CODE (addr) == PLUS
11591 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
11593 gcc_assert (GET_CODE (addr) == PLUS);
11595 if (GET_CODE (addr) == PLUS)
11597 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
11599 /* Check first to see if this is a constant offset from a @GOTOFF
11600 symbol reference. */
11601 if (gotoff_operand (op0, Pmode)
11602 && CONST_INT_P (op1))
11606 if (reload_in_progress)
11607 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11608 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
11610 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
11611 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11612 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11616 emit_move_insn (reg, new_rtx);
11622 if (INTVAL (op1) < -16*1024*1024
11623 || INTVAL (op1) >= 16*1024*1024)
11625 if (!x86_64_immediate_operand (op1, Pmode))
11626 op1 = force_reg (Pmode, op1);
11627 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
11633 base = legitimize_pic_address (XEXP (addr, 0), reg);
11634 new_rtx = legitimize_pic_address (XEXP (addr, 1),
11635 base == reg ? NULL_RTX : reg);
11637 if (CONST_INT_P (new_rtx))
11638 new_rtx = plus_constant (base, INTVAL (new_rtx));
11641 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
11643 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
11644 new_rtx = XEXP (new_rtx, 1);
11646 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
11654 /* Load the thread pointer. If TO_REG is true, force it into a register. */
11657 get_thread_pointer (int to_reg)
11661 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
11665 reg = gen_reg_rtx (Pmode);
11666 insn = gen_rtx_SET (VOIDmode, reg, tp);
11667 insn = emit_insn (insn);
11672 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
11673 false if we expect this to be used for a memory address and true if
11674 we expect to load the address into a register. */
11677 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
11679 rtx dest, base, off, pic, tp;
11684 case TLS_MODEL_GLOBAL_DYNAMIC:
11685 dest = gen_reg_rtx (Pmode);
11686 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
11688 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
11690 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
11693 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
11694 insns = get_insns ();
11697 RTL_CONST_CALL_P (insns) = 1;
11698 emit_libcall_block (insns, dest, rax, x);
11700 else if (TARGET_64BIT && TARGET_GNU2_TLS)
11701 emit_insn (gen_tls_global_dynamic_64 (dest, x));
11703 emit_insn (gen_tls_global_dynamic_32 (dest, x));
11705 if (TARGET_GNU2_TLS)
11707 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
11709 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
11713 case TLS_MODEL_LOCAL_DYNAMIC:
11714 base = gen_reg_rtx (Pmode);
11715 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
11717 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
11719 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
11722 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
11723 insns = get_insns ();
11726 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
11727 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
11728 RTL_CONST_CALL_P (insns) = 1;
11729 emit_libcall_block (insns, base, rax, note);
11731 else if (TARGET_64BIT && TARGET_GNU2_TLS)
11732 emit_insn (gen_tls_local_dynamic_base_64 (base));
11734 emit_insn (gen_tls_local_dynamic_base_32 (base));
11736 if (TARGET_GNU2_TLS)
11738 rtx x = ix86_tls_module_base ();
11740 set_unique_reg_note (get_last_insn (), REG_EQUIV,
11741 gen_rtx_MINUS (Pmode, x, tp));
11744 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
11745 off = gen_rtx_CONST (Pmode, off);
11747 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
11749 if (TARGET_GNU2_TLS)
11751 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
11753 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
11758 case TLS_MODEL_INITIAL_EXEC:
11762 type = UNSPEC_GOTNTPOFF;
11766 if (reload_in_progress)
11767 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11768 pic = pic_offset_table_rtx;
11769 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
11771 else if (!TARGET_ANY_GNU_TLS)
11773 pic = gen_reg_rtx (Pmode);
11774 emit_insn (gen_set_got (pic));
11775 type = UNSPEC_GOTTPOFF;
11780 type = UNSPEC_INDNTPOFF;
11783 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
11784 off = gen_rtx_CONST (Pmode, off);
11786 off = gen_rtx_PLUS (Pmode, pic, off);
11787 off = gen_const_mem (Pmode, off);
11788 set_mem_alias_set (off, ix86_GOT_alias_set ());
11790 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11792 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11793 off = force_reg (Pmode, off);
11794 return gen_rtx_PLUS (Pmode, base, off);
11798 base = get_thread_pointer (true);
11799 dest = gen_reg_rtx (Pmode);
11800 emit_insn (gen_subsi3 (dest, base, off));
11804 case TLS_MODEL_LOCAL_EXEC:
11805 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
11806 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11807 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
11808 off = gen_rtx_CONST (Pmode, off);
11810 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11812 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11813 return gen_rtx_PLUS (Pmode, base, off);
11817 base = get_thread_pointer (true);
11818 dest = gen_reg_rtx (Pmode);
11819 emit_insn (gen_subsi3 (dest, base, off));
11824 gcc_unreachable ();
11830 /* Create or return the unique __imp_DECL dllimport symbol corresponding
11833 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
11834 htab_t dllimport_map;
11837 get_dllimport_decl (tree decl)
11839 struct tree_map *h, in;
11842 const char *prefix;
11843 size_t namelen, prefixlen;
11848 if (!dllimport_map)
11849 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
11851 in.hash = htab_hash_pointer (decl);
11852 in.base.from = decl;
11853 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
11854 h = (struct tree_map *) *loc;
11858 *loc = h = ggc_alloc_tree_map ();
11860 h->base.from = decl;
11861 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
11862 VAR_DECL, NULL, ptr_type_node);
11863 DECL_ARTIFICIAL (to) = 1;
11864 DECL_IGNORED_P (to) = 1;
11865 DECL_EXTERNAL (to) = 1;
11866 TREE_READONLY (to) = 1;
11868 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
11869 name = targetm.strip_name_encoding (name);
11870 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
11871 ? "*__imp_" : "*__imp__";
11872 namelen = strlen (name);
11873 prefixlen = strlen (prefix);
11874 imp_name = (char *) alloca (namelen + prefixlen + 1);
11875 memcpy (imp_name, prefix, prefixlen);
11876 memcpy (imp_name + prefixlen, name, namelen + 1);
11878 name = ggc_alloc_string (imp_name, namelen + prefixlen);
11879 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
11880 SET_SYMBOL_REF_DECL (rtl, to);
11881 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
11883 rtl = gen_const_mem (Pmode, rtl);
11884 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
11886 SET_DECL_RTL (to, rtl);
11887 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
11892 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
11893 true if we require the result be a register. */
11896 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
11901 gcc_assert (SYMBOL_REF_DECL (symbol));
11902 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
11904 x = DECL_RTL (imp_decl);
11906 x = force_reg (Pmode, x);
11910 /* Try machine-dependent ways of modifying an illegitimate address
11911 to be legitimate. If we find one, return the new, valid address.
11912 This macro is used in only one place: `memory_address' in explow.c.
11914 OLDX is the address as it was before break_out_memory_refs was called.
11915 In some cases it is useful to look at this to decide what needs to be done.
11917 It is always safe for this macro to do nothing. It exists to recognize
11918 opportunities to optimize the output.
11920 For the 80386, we handle X+REG by loading X into a register R and
11921 using R+REG. R will go in a general reg and indexing will be used.
11922 However, if REG is a broken-out memory address or multiplication,
11923 nothing needs to be done because REG can certainly go in a general reg.
11925 When -fpic is used, special handling is needed for symbolic references.
11926 See comments by legitimize_pic_address in i386.c for details. */
11929 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
11930 enum machine_mode mode)
11935 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
11937 return legitimize_tls_address (x, (enum tls_model) log, false);
11938 if (GET_CODE (x) == CONST
11939 && GET_CODE (XEXP (x, 0)) == PLUS
11940 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
11941 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
11943 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
11944 (enum tls_model) log, false);
11945 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
11948 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11950 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
11951 return legitimize_dllimport_symbol (x, true);
11952 if (GET_CODE (x) == CONST
11953 && GET_CODE (XEXP (x, 0)) == PLUS
11954 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
11955 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
11957 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
11958 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
11962 if (flag_pic && SYMBOLIC_CONST (x))
11963 return legitimize_pic_address (x, 0);
11965 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
11966 if (GET_CODE (x) == ASHIFT
11967 && CONST_INT_P (XEXP (x, 1))
11968 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
11971 log = INTVAL (XEXP (x, 1));
11972 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
11973 GEN_INT (1 << log));
11976 if (GET_CODE (x) == PLUS)
11978 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
11980 if (GET_CODE (XEXP (x, 0)) == ASHIFT
11981 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11982 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
11985 log = INTVAL (XEXP (XEXP (x, 0), 1));
11986 XEXP (x, 0) = gen_rtx_MULT (Pmode,
11987 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
11988 GEN_INT (1 << log));
11991 if (GET_CODE (XEXP (x, 1)) == ASHIFT
11992 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11993 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
11996 log = INTVAL (XEXP (XEXP (x, 1), 1));
11997 XEXP (x, 1) = gen_rtx_MULT (Pmode,
11998 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
11999 GEN_INT (1 << log));
12002 /* Put multiply first if it isn't already. */
12003 if (GET_CODE (XEXP (x, 1)) == MULT)
12005 rtx tmp = XEXP (x, 0);
12006 XEXP (x, 0) = XEXP (x, 1);
12011 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
12012 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
12013 created by virtual register instantiation, register elimination, and
12014 similar optimizations. */
12015 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
12018 x = gen_rtx_PLUS (Pmode,
12019 gen_rtx_PLUS (Pmode, XEXP (x, 0),
12020 XEXP (XEXP (x, 1), 0)),
12021 XEXP (XEXP (x, 1), 1));
12025 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
12026 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
12027 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
12028 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
12029 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
12030 && CONSTANT_P (XEXP (x, 1)))
12033 rtx other = NULL_RTX;
12035 if (CONST_INT_P (XEXP (x, 1)))
12037 constant = XEXP (x, 1);
12038 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
12040 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
12042 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
12043 other = XEXP (x, 1);
12051 x = gen_rtx_PLUS (Pmode,
12052 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
12053 XEXP (XEXP (XEXP (x, 0), 1), 0)),
12054 plus_constant (other, INTVAL (constant)));
12058 if (changed && ix86_legitimate_address_p (mode, x, false))
12061 if (GET_CODE (XEXP (x, 0)) == MULT)
12064 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
12067 if (GET_CODE (XEXP (x, 1)) == MULT)
12070 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
12074 && REG_P (XEXP (x, 1))
12075 && REG_P (XEXP (x, 0)))
12078 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
12081 x = legitimize_pic_address (x, 0);
12084 if (changed && ix86_legitimate_address_p (mode, x, false))
12087 if (REG_P (XEXP (x, 0)))
12089 rtx temp = gen_reg_rtx (Pmode);
12090 rtx val = force_operand (XEXP (x, 1), temp);
12092 emit_move_insn (temp, val);
12094 XEXP (x, 1) = temp;
12098 else if (REG_P (XEXP (x, 1)))
12100 rtx temp = gen_reg_rtx (Pmode);
12101 rtx val = force_operand (XEXP (x, 0), temp);
12103 emit_move_insn (temp, val);
12105 XEXP (x, 0) = temp;
12113 /* Print an integer constant expression in assembler syntax. Addition
12114 and subtraction are the only arithmetic that may appear in these
12115 expressions. FILE is the stdio stream to write to, X is the rtx, and
12116 CODE is the operand print code from the output string. */
12119 output_pic_addr_const (FILE *file, rtx x, int code)
12123 switch (GET_CODE (x))
12126 gcc_assert (flag_pic);
12131 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
12132 output_addr_const (file, x);
12135 const char *name = XSTR (x, 0);
12137 /* Mark the decl as referenced so that cgraph will
12138 output the function. */
12139 if (SYMBOL_REF_DECL (x))
12140 mark_decl_referenced (SYMBOL_REF_DECL (x));
12143 if (MACHOPIC_INDIRECT
12144 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
12145 name = machopic_indirection_name (x, /*stub_p=*/true);
12147 assemble_name (file, name);
12149 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
12150 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
12151 fputs ("@PLT", file);
12158 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
12159 assemble_name (asm_out_file, buf);
12163 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
12167 /* This used to output parentheses around the expression,
12168 but that does not work on the 386 (either ATT or BSD assembler). */
12169 output_pic_addr_const (file, XEXP (x, 0), code);
12173 if (GET_MODE (x) == VOIDmode)
12175 /* We can use %d if the number is <32 bits and positive. */
12176 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
12177 fprintf (file, "0x%lx%08lx",
12178 (unsigned long) CONST_DOUBLE_HIGH (x),
12179 (unsigned long) CONST_DOUBLE_LOW (x));
12181 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
12184 /* We can't handle floating point constants;
12185 TARGET_PRINT_OPERAND must handle them. */
12186 output_operand_lossage ("floating constant misused");
12190 /* Some assemblers need integer constants to appear first. */
12191 if (CONST_INT_P (XEXP (x, 0)))
12193 output_pic_addr_const (file, XEXP (x, 0), code);
12195 output_pic_addr_const (file, XEXP (x, 1), code);
12199 gcc_assert (CONST_INT_P (XEXP (x, 1)));
12200 output_pic_addr_const (file, XEXP (x, 1), code);
12202 output_pic_addr_const (file, XEXP (x, 0), code);
12208 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
12209 output_pic_addr_const (file, XEXP (x, 0), code);
12211 output_pic_addr_const (file, XEXP (x, 1), code);
12213 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
12217 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
12219 bool f = i386_asm_output_addr_const_extra (file, x);
12224 gcc_assert (XVECLEN (x, 0) == 1);
12225 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
12226 switch (XINT (x, 1))
12229 fputs ("@GOT", file);
12231 case UNSPEC_GOTOFF:
12232 fputs ("@GOTOFF", file);
12234 case UNSPEC_PLTOFF:
12235 fputs ("@PLTOFF", file);
12237 case UNSPEC_GOTPCREL:
12238 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12239 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
12241 case UNSPEC_GOTTPOFF:
12242 /* FIXME: This might be @TPOFF in Sun ld too. */
12243 fputs ("@gottpoff", file);
12246 fputs ("@tpoff", file);
12248 case UNSPEC_NTPOFF:
12250 fputs ("@tpoff", file);
12252 fputs ("@ntpoff", file);
12254 case UNSPEC_DTPOFF:
12255 fputs ("@dtpoff", file);
12257 case UNSPEC_GOTNTPOFF:
12259 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12260 "@gottpoff(%rip)": "@gottpoff[rip]", file);
12262 fputs ("@gotntpoff", file);
12264 case UNSPEC_INDNTPOFF:
12265 fputs ("@indntpoff", file);
12268 case UNSPEC_MACHOPIC_OFFSET:
12270 machopic_output_function_base_name (file);
12274 output_operand_lossage ("invalid UNSPEC as operand");
12280 output_operand_lossage ("invalid expression as operand");
12284 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12285 We need to emit DTP-relative relocations. */
12287 static void ATTRIBUTE_UNUSED
12288 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
12290 fputs (ASM_LONG, file);
12291 output_addr_const (file, x);
12292 fputs ("@dtpoff", file);
12298 fputs (", 0", file);
12301 gcc_unreachable ();
12305 /* Return true if X is a representation of the PIC register. This copes
12306 with calls from ix86_find_base_term, where the register might have
12307 been replaced by a cselib value. */
12310 ix86_pic_register_p (rtx x)
12312 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
12313 return (pic_offset_table_rtx
12314 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
12316 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
12319 /* Helper function for ix86_delegitimize_address.
12320 Attempt to delegitimize TLS local-exec accesses. */
12323 ix86_delegitimize_tls_address (rtx orig_x)
12325 rtx x = orig_x, unspec;
12326 struct ix86_address addr;
12328 if (!TARGET_TLS_DIRECT_SEG_REFS)
12332 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
12334 if (ix86_decompose_address (x, &addr) == 0
12335 || addr.seg != (TARGET_64BIT ? SEG_FS : SEG_GS)
12336 || addr.disp == NULL_RTX
12337 || GET_CODE (addr.disp) != CONST)
12339 unspec = XEXP (addr.disp, 0);
12340 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
12341 unspec = XEXP (unspec, 0);
12342 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
12344 x = XVECEXP (unspec, 0, 0);
12345 gcc_assert (GET_CODE (x) == SYMBOL_REF);
12346 if (unspec != XEXP (addr.disp, 0))
12347 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
12350 rtx idx = addr.index;
12351 if (addr.scale != 1)
12352 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
12353 x = gen_rtx_PLUS (Pmode, idx, x);
12356 x = gen_rtx_PLUS (Pmode, addr.base, x);
12357 if (MEM_P (orig_x))
12358 x = replace_equiv_address_nv (orig_x, x);
12362 /* In the name of slightly smaller debug output, and to cater to
12363 general assembler lossage, recognize PIC+GOTOFF and turn it back
12364 into a direct symbol reference.
12366 On Darwin, this is necessary to avoid a crash, because Darwin
12367 has a different PIC label for each routine but the DWARF debugging
12368 information is not associated with any particular routine, so it's
12369 necessary to remove references to the PIC label from RTL stored by
12370 the DWARF output code. */
12373 ix86_delegitimize_address (rtx x)
12375 rtx orig_x = delegitimize_mem_from_attrs (x);
12376 /* addend is NULL or some rtx if x is something+GOTOFF where
12377 something doesn't include the PIC register. */
12378 rtx addend = NULL_RTX;
12379 /* reg_addend is NULL or a multiple of some register. */
12380 rtx reg_addend = NULL_RTX;
12381 /* const_addend is NULL or a const_int. */
12382 rtx const_addend = NULL_RTX;
12383 /* This is the result, or NULL. */
12384 rtx result = NULL_RTX;
12393 if (GET_CODE (x) != CONST
12394 || GET_CODE (XEXP (x, 0)) != UNSPEC
12395 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
12396 || !MEM_P (orig_x))
12397 return ix86_delegitimize_tls_address (orig_x);
12398 x = XVECEXP (XEXP (x, 0), 0, 0);
12399 if (GET_MODE (orig_x) != Pmode)
12400 return simplify_gen_subreg (GET_MODE (orig_x), x, Pmode, 0);
12404 if (GET_CODE (x) != PLUS
12405 || GET_CODE (XEXP (x, 1)) != CONST)
12406 return ix86_delegitimize_tls_address (orig_x);
12408 if (ix86_pic_register_p (XEXP (x, 0)))
12409 /* %ebx + GOT/GOTOFF */
12411 else if (GET_CODE (XEXP (x, 0)) == PLUS)
12413 /* %ebx + %reg * scale + GOT/GOTOFF */
12414 reg_addend = XEXP (x, 0);
12415 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
12416 reg_addend = XEXP (reg_addend, 1);
12417 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
12418 reg_addend = XEXP (reg_addend, 0);
12421 reg_addend = NULL_RTX;
12422 addend = XEXP (x, 0);
12426 addend = XEXP (x, 0);
12428 x = XEXP (XEXP (x, 1), 0);
12429 if (GET_CODE (x) == PLUS
12430 && CONST_INT_P (XEXP (x, 1)))
12432 const_addend = XEXP (x, 1);
12436 if (GET_CODE (x) == UNSPEC
12437 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
12438 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
12439 result = XVECEXP (x, 0, 0);
12441 if (TARGET_MACHO && darwin_local_data_pic (x)
12442 && !MEM_P (orig_x))
12443 result = XVECEXP (x, 0, 0);
12446 return ix86_delegitimize_tls_address (orig_x);
12449 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
12451 result = gen_rtx_PLUS (Pmode, reg_addend, result);
12454 /* If the rest of original X doesn't involve the PIC register, add
12455 addend and subtract pic_offset_table_rtx. This can happen e.g.
12457 leal (%ebx, %ecx, 4), %ecx
12459 movl foo@GOTOFF(%ecx), %edx
12460 in which case we return (%ecx - %ebx) + foo. */
12461 if (pic_offset_table_rtx)
12462 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
12463 pic_offset_table_rtx),
12468 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
12469 return simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
12473 /* If X is a machine specific address (i.e. a symbol or label being
12474 referenced as a displacement from the GOT implemented using an
12475 UNSPEC), then return the base term. Otherwise return X. */
12478 ix86_find_base_term (rtx x)
12484 if (GET_CODE (x) != CONST)
12486 term = XEXP (x, 0);
12487 if (GET_CODE (term) == PLUS
12488 && (CONST_INT_P (XEXP (term, 1))
12489 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
12490 term = XEXP (term, 0);
12491 if (GET_CODE (term) != UNSPEC
12492 || XINT (term, 1) != UNSPEC_GOTPCREL)
12495 return XVECEXP (term, 0, 0);
12498 return ix86_delegitimize_address (x);
12502 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
12503 int fp, FILE *file)
12505 const char *suffix;
12507 if (mode == CCFPmode || mode == CCFPUmode)
12509 code = ix86_fp_compare_code_to_integer (code);
12513 code = reverse_condition (code);
12564 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
12568 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
12569 Those same assemblers have the same but opposite lossage on cmov. */
12570 if (mode == CCmode)
12571 suffix = fp ? "nbe" : "a";
12572 else if (mode == CCCmode)
12575 gcc_unreachable ();
12591 gcc_unreachable ();
12595 gcc_assert (mode == CCmode || mode == CCCmode);
12612 gcc_unreachable ();
12616 /* ??? As above. */
12617 gcc_assert (mode == CCmode || mode == CCCmode);
12618 suffix = fp ? "nb" : "ae";
12621 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
12625 /* ??? As above. */
12626 if (mode == CCmode)
12628 else if (mode == CCCmode)
12629 suffix = fp ? "nb" : "ae";
12631 gcc_unreachable ();
12634 suffix = fp ? "u" : "p";
12637 suffix = fp ? "nu" : "np";
12640 gcc_unreachable ();
12642 fputs (suffix, file);
12645 /* Print the name of register X to FILE based on its machine mode and number.
12646 If CODE is 'w', pretend the mode is HImode.
12647 If CODE is 'b', pretend the mode is QImode.
12648 If CODE is 'k', pretend the mode is SImode.
12649 If CODE is 'q', pretend the mode is DImode.
12650 If CODE is 'x', pretend the mode is V4SFmode.
12651 If CODE is 't', pretend the mode is V8SFmode.
12652 If CODE is 'h', pretend the reg is the 'high' byte register.
12653 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
12654 If CODE is 'd', duplicate the operand for AVX instruction.
12658 print_reg (rtx x, int code, FILE *file)
12661 bool duplicated = code == 'd' && TARGET_AVX;
12663 gcc_assert (x == pc_rtx
12664 || (REGNO (x) != ARG_POINTER_REGNUM
12665 && REGNO (x) != FRAME_POINTER_REGNUM
12666 && REGNO (x) != FLAGS_REG
12667 && REGNO (x) != FPSR_REG
12668 && REGNO (x) != FPCR_REG));
12670 if (ASSEMBLER_DIALECT == ASM_ATT)
12675 gcc_assert (TARGET_64BIT);
12676 fputs ("rip", file);
12680 if (code == 'w' || MMX_REG_P (x))
12682 else if (code == 'b')
12684 else if (code == 'k')
12686 else if (code == 'q')
12688 else if (code == 'y')
12690 else if (code == 'h')
12692 else if (code == 'x')
12694 else if (code == 't')
12697 code = GET_MODE_SIZE (GET_MODE (x));
12699 /* Irritatingly, AMD extended registers use different naming convention
12700 from the normal registers. */
12701 if (REX_INT_REG_P (x))
12703 gcc_assert (TARGET_64BIT);
12707 error ("extended registers have no high halves");
12710 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
12713 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
12716 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
12719 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
12722 error ("unsupported operand size for extended register");
12732 if (STACK_TOP_P (x))
12741 if (! ANY_FP_REG_P (x))
12742 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
12747 reg = hi_reg_name[REGNO (x)];
12750 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
12752 reg = qi_reg_name[REGNO (x)];
12755 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
12757 reg = qi_high_reg_name[REGNO (x)];
12762 gcc_assert (!duplicated);
12764 fputs (hi_reg_name[REGNO (x)] + 1, file);
12769 gcc_unreachable ();
12775 if (ASSEMBLER_DIALECT == ASM_ATT)
12776 fprintf (file, ", %%%s", reg);
12778 fprintf (file, ", %s", reg);
12782 /* Locate some local-dynamic symbol still in use by this function
12783 so that we can print its name in some tls_local_dynamic_base
12787 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
12791 if (GET_CODE (x) == SYMBOL_REF
12792 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
12794 cfun->machine->some_ld_name = XSTR (x, 0);
12801 static const char *
12802 get_some_local_dynamic_name (void)
12806 if (cfun->machine->some_ld_name)
12807 return cfun->machine->some_ld_name;
12809 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
12810 if (NONDEBUG_INSN_P (insn)
12811 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
12812 return cfun->machine->some_ld_name;
12817 /* Meaning of CODE:
12818 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
12819 C -- print opcode suffix for set/cmov insn.
12820 c -- like C, but print reversed condition
12821 F,f -- likewise, but for floating-point.
12822 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
12824 R -- print the prefix for register names.
12825 z -- print the opcode suffix for the size of the current operand.
12826 Z -- likewise, with special suffixes for x87 instructions.
12827 * -- print a star (in certain assembler syntax)
12828 A -- print an absolute memory reference.
12829 w -- print the operand as if it's a "word" (HImode) even if it isn't.
12830 s -- print a shift double count, followed by the assemblers argument
12832 b -- print the QImode name of the register for the indicated operand.
12833 %b0 would print %al if operands[0] is reg 0.
12834 w -- likewise, print the HImode name of the register.
12835 k -- likewise, print the SImode name of the register.
12836 q -- likewise, print the DImode name of the register.
12837 x -- likewise, print the V4SFmode name of the register.
12838 t -- likewise, print the V8SFmode name of the register.
12839 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
12840 y -- print "st(0)" instead of "st" as a register.
12841 d -- print duplicated register operand for AVX instruction.
12842 D -- print condition for SSE cmp instruction.
12843 P -- if PIC, print an @PLT suffix.
12844 X -- don't print any sort of PIC '@' suffix for a symbol.
12845 & -- print some in-use local-dynamic symbol name.
12846 H -- print a memory address offset by 8; used for sse high-parts
12847 Y -- print condition for XOP pcom* instruction.
12848 + -- print a branch hint as 'cs' or 'ds' prefix
12849 ; -- print a semicolon (after prefixes due to bug in older gas).
12850 @ -- print a segment register of thread base pointer load
12854 ix86_print_operand (FILE *file, rtx x, int code)
12861 if (ASSEMBLER_DIALECT == ASM_ATT)
12867 const char *name = get_some_local_dynamic_name ();
12869 output_operand_lossage ("'%%&' used without any "
12870 "local dynamic TLS references");
12872 assemble_name (file, name);
12877 switch (ASSEMBLER_DIALECT)
12884 /* Intel syntax. For absolute addresses, registers should not
12885 be surrounded by braces. */
12889 ix86_print_operand (file, x, 0);
12896 gcc_unreachable ();
12899 ix86_print_operand (file, x, 0);
12904 if (ASSEMBLER_DIALECT == ASM_ATT)
12909 if (ASSEMBLER_DIALECT == ASM_ATT)
12914 if (ASSEMBLER_DIALECT == ASM_ATT)
12919 if (ASSEMBLER_DIALECT == ASM_ATT)
12924 if (ASSEMBLER_DIALECT == ASM_ATT)
12929 if (ASSEMBLER_DIALECT == ASM_ATT)
12934 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12936 /* Opcodes don't get size suffixes if using Intel opcodes. */
12937 if (ASSEMBLER_DIALECT == ASM_INTEL)
12940 switch (GET_MODE_SIZE (GET_MODE (x)))
12959 output_operand_lossage
12960 ("invalid operand size for operand code '%c'", code);
12965 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12967 (0, "non-integer operand used with operand code '%c'", code);
12971 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
12972 if (ASSEMBLER_DIALECT == ASM_INTEL)
12975 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12977 switch (GET_MODE_SIZE (GET_MODE (x)))
12980 #ifdef HAVE_AS_IX86_FILDS
12990 #ifdef HAVE_AS_IX86_FILDQ
12993 fputs ("ll", file);
13001 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
13003 /* 387 opcodes don't get size suffixes
13004 if the operands are registers. */
13005 if (STACK_REG_P (x))
13008 switch (GET_MODE_SIZE (GET_MODE (x)))
13029 output_operand_lossage
13030 ("invalid operand type used with operand code '%c'", code);
13034 output_operand_lossage
13035 ("invalid operand size for operand code '%c'", code);
13052 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
13054 ix86_print_operand (file, x, 0);
13055 fputs (", ", file);
13060 /* Little bit of braindamage here. The SSE compare instructions
13061 does use completely different names for the comparisons that the
13062 fp conditional moves. */
13065 switch (GET_CODE (x))
13068 fputs ("eq", file);
13071 fputs ("eq_us", file);
13074 fputs ("lt", file);
13077 fputs ("nge", file);
13080 fputs ("le", file);
13083 fputs ("ngt", file);
13086 fputs ("unord", file);
13089 fputs ("neq", file);
13092 fputs ("neq_oq", file);
13095 fputs ("ge", file);
13098 fputs ("nlt", file);
13101 fputs ("gt", file);
13104 fputs ("nle", file);
13107 fputs ("ord", file);
13110 output_operand_lossage ("operand is not a condition code, "
13111 "invalid operand code 'D'");
13117 switch (GET_CODE (x))
13121 fputs ("eq", file);
13125 fputs ("lt", file);
13129 fputs ("le", file);
13132 fputs ("unord", file);
13136 fputs ("neq", file);
13140 fputs ("nlt", file);
13144 fputs ("nle", file);
13147 fputs ("ord", file);
13150 output_operand_lossage ("operand is not a condition code, "
13151 "invalid operand code 'D'");
13157 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13158 if (ASSEMBLER_DIALECT == ASM_ATT)
13160 switch (GET_MODE (x))
13162 case HImode: putc ('w', file); break;
13164 case SFmode: putc ('l', file); break;
13166 case DFmode: putc ('q', file); break;
13167 default: gcc_unreachable ();
13174 if (!COMPARISON_P (x))
13176 output_operand_lossage ("operand is neither a constant nor a "
13177 "condition code, invalid operand code "
13181 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
13184 if (!COMPARISON_P (x))
13186 output_operand_lossage ("operand is neither a constant nor a "
13187 "condition code, invalid operand code "
13191 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13192 if (ASSEMBLER_DIALECT == ASM_ATT)
13195 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
13198 /* Like above, but reverse condition */
13200 /* Check to see if argument to %c is really a constant
13201 and not a condition code which needs to be reversed. */
13202 if (!COMPARISON_P (x))
13204 output_operand_lossage ("operand is neither a constant nor a "
13205 "condition code, invalid operand "
13209 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
13212 if (!COMPARISON_P (x))
13214 output_operand_lossage ("operand is neither a constant nor a "
13215 "condition code, invalid operand "
13219 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
13220 if (ASSEMBLER_DIALECT == ASM_ATT)
13223 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
13227 /* It doesn't actually matter what mode we use here, as we're
13228 only going to use this for printing. */
13229 x = adjust_address_nv (x, DImode, 8);
13237 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
13240 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
13243 int pred_val = INTVAL (XEXP (x, 0));
13245 if (pred_val < REG_BR_PROB_BASE * 45 / 100
13246 || pred_val > REG_BR_PROB_BASE * 55 / 100)
13248 int taken = pred_val > REG_BR_PROB_BASE / 2;
13249 int cputaken = final_forward_branch_p (current_output_insn) == 0;
13251 /* Emit hints only in the case default branch prediction
13252 heuristics would fail. */
13253 if (taken != cputaken)
13255 /* We use 3e (DS) prefix for taken branches and
13256 2e (CS) prefix for not taken branches. */
13258 fputs ("ds ; ", file);
13260 fputs ("cs ; ", file);
13268 switch (GET_CODE (x))
13271 fputs ("neq", file);
13274 fputs ("eq", file);
13278 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
13282 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
13286 fputs ("le", file);
13290 fputs ("lt", file);
13293 fputs ("unord", file);
13296 fputs ("ord", file);
13299 fputs ("ueq", file);
13302 fputs ("nlt", file);
13305 fputs ("nle", file);
13308 fputs ("ule", file);
13311 fputs ("ult", file);
13314 fputs ("une", file);
13317 output_operand_lossage ("operand is not a condition code, "
13318 "invalid operand code 'Y'");
13324 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
13330 if (ASSEMBLER_DIALECT == ASM_ATT)
13333 /* The kernel uses a different segment register for performance
13334 reasons; a system call would not have to trash the userspace
13335 segment register, which would be expensive. */
13336 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
13337 fputs ("fs", file);
13339 fputs ("gs", file);
13343 output_operand_lossage ("invalid operand code '%c'", code);
13348 print_reg (x, code, file);
13350 else if (MEM_P (x))
13352 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
13353 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
13354 && GET_MODE (x) != BLKmode)
13357 switch (GET_MODE_SIZE (GET_MODE (x)))
13359 case 1: size = "BYTE"; break;
13360 case 2: size = "WORD"; break;
13361 case 4: size = "DWORD"; break;
13362 case 8: size = "QWORD"; break;
13363 case 12: size = "TBYTE"; break;
13365 if (GET_MODE (x) == XFmode)
13370 case 32: size = "YMMWORD"; break;
13372 gcc_unreachable ();
13375 /* Check for explicit size override (codes 'b', 'w' and 'k') */
13378 else if (code == 'w')
13380 else if (code == 'k')
13383 fputs (size, file);
13384 fputs (" PTR ", file);
13388 /* Avoid (%rip) for call operands. */
13389 if (CONSTANT_ADDRESS_P (x) && code == 'P'
13390 && !CONST_INT_P (x))
13391 output_addr_const (file, x);
13392 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
13393 output_operand_lossage ("invalid constraints for operand");
13395 output_address (x);
13398 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
13403 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
13404 REAL_VALUE_TO_TARGET_SINGLE (r, l);
13406 if (ASSEMBLER_DIALECT == ASM_ATT)
13408 /* Sign extend 32bit SFmode immediate to 8 bytes. */
13410 fprintf (file, "0x%08llx", (unsigned long long) (int) l);
13412 fprintf (file, "0x%08x", (unsigned int) l);
13415 /* These float cases don't actually occur as immediate operands. */
13416 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
13420 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
13421 fputs (dstr, file);
13424 else if (GET_CODE (x) == CONST_DOUBLE
13425 && GET_MODE (x) == XFmode)
13429 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
13430 fputs (dstr, file);
13435 /* We have patterns that allow zero sets of memory, for instance.
13436 In 64-bit mode, we should probably support all 8-byte vectors,
13437 since we can in fact encode that into an immediate. */
13438 if (GET_CODE (x) == CONST_VECTOR)
13440 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
13446 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
13448 if (ASSEMBLER_DIALECT == ASM_ATT)
13451 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
13452 || GET_CODE (x) == LABEL_REF)
13454 if (ASSEMBLER_DIALECT == ASM_ATT)
13457 fputs ("OFFSET FLAT:", file);
13460 if (CONST_INT_P (x))
13461 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
13463 output_pic_addr_const (file, x, code);
13465 output_addr_const (file, x);
13470 ix86_print_operand_punct_valid_p (unsigned char code)
13472 return (code == '@' || code == '*' || code == '+'
13473 || code == '&' || code == ';');
13476 /* Print a memory operand whose address is ADDR. */
13479 ix86_print_operand_address (FILE *file, rtx addr)
13481 struct ix86_address parts;
13482 rtx base, index, disp;
13484 int ok = ix86_decompose_address (addr, &parts);
13489 index = parts.index;
13491 scale = parts.scale;
13499 if (ASSEMBLER_DIALECT == ASM_ATT)
13501 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
13504 gcc_unreachable ();
13507 /* Use one byte shorter RIP relative addressing for 64bit mode. */
13508 if (TARGET_64BIT && !base && !index)
13512 if (GET_CODE (disp) == CONST
13513 && GET_CODE (XEXP (disp, 0)) == PLUS
13514 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
13515 symbol = XEXP (XEXP (disp, 0), 0);
13517 if (GET_CODE (symbol) == LABEL_REF
13518 || (GET_CODE (symbol) == SYMBOL_REF
13519 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
13522 if (!base && !index)
13524 /* Displacement only requires special attention. */
13526 if (CONST_INT_P (disp))
13528 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
13529 fputs ("ds:", file);
13530 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
13533 output_pic_addr_const (file, disp, 0);
13535 output_addr_const (file, disp);
13539 if (ASSEMBLER_DIALECT == ASM_ATT)
13544 output_pic_addr_const (file, disp, 0);
13545 else if (GET_CODE (disp) == LABEL_REF)
13546 output_asm_label (disp);
13548 output_addr_const (file, disp);
13553 print_reg (base, 0, file);
13557 print_reg (index, 0, file);
13559 fprintf (file, ",%d", scale);
13565 rtx offset = NULL_RTX;
13569 /* Pull out the offset of a symbol; print any symbol itself. */
13570 if (GET_CODE (disp) == CONST
13571 && GET_CODE (XEXP (disp, 0)) == PLUS
13572 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
13574 offset = XEXP (XEXP (disp, 0), 1);
13575 disp = gen_rtx_CONST (VOIDmode,
13576 XEXP (XEXP (disp, 0), 0));
13580 output_pic_addr_const (file, disp, 0);
13581 else if (GET_CODE (disp) == LABEL_REF)
13582 output_asm_label (disp);
13583 else if (CONST_INT_P (disp))
13586 output_addr_const (file, disp);
13592 print_reg (base, 0, file);
13595 if (INTVAL (offset) >= 0)
13597 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
13601 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
13608 print_reg (index, 0, file);
13610 fprintf (file, "*%d", scale);
13617 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
13620 i386_asm_output_addr_const_extra (FILE *file, rtx x)
13624 if (GET_CODE (x) != UNSPEC)
13627 op = XVECEXP (x, 0, 0);
13628 switch (XINT (x, 1))
13630 case UNSPEC_GOTTPOFF:
13631 output_addr_const (file, op);
13632 /* FIXME: This might be @TPOFF in Sun ld. */
13633 fputs ("@gottpoff", file);
13636 output_addr_const (file, op);
13637 fputs ("@tpoff", file);
13639 case UNSPEC_NTPOFF:
13640 output_addr_const (file, op);
13642 fputs ("@tpoff", file);
13644 fputs ("@ntpoff", file);
13646 case UNSPEC_DTPOFF:
13647 output_addr_const (file, op);
13648 fputs ("@dtpoff", file);
13650 case UNSPEC_GOTNTPOFF:
13651 output_addr_const (file, op);
13653 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13654 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
13656 fputs ("@gotntpoff", file);
13658 case UNSPEC_INDNTPOFF:
13659 output_addr_const (file, op);
13660 fputs ("@indntpoff", file);
13663 case UNSPEC_MACHOPIC_OFFSET:
13664 output_addr_const (file, op);
13666 machopic_output_function_base_name (file);
13670 case UNSPEC_STACK_CHECK:
13674 gcc_assert (flag_split_stack);
13676 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
13677 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
13679 gcc_unreachable ();
13682 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
13693 /* Split one or more double-mode RTL references into pairs of half-mode
13694 references. The RTL can be REG, offsettable MEM, integer constant, or
13695 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
13696 split and "num" is its length. lo_half and hi_half are output arrays
13697 that parallel "operands". */
13700 split_double_mode (enum machine_mode mode, rtx operands[],
13701 int num, rtx lo_half[], rtx hi_half[])
13703 enum machine_mode half_mode;
13709 half_mode = DImode;
13712 half_mode = SImode;
13715 gcc_unreachable ();
13718 byte = GET_MODE_SIZE (half_mode);
13722 rtx op = operands[num];
13724 /* simplify_subreg refuse to split volatile memory addresses,
13725 but we still have to handle it. */
13728 lo_half[num] = adjust_address (op, half_mode, 0);
13729 hi_half[num] = adjust_address (op, half_mode, byte);
13733 lo_half[num] = simplify_gen_subreg (half_mode, op,
13734 GET_MODE (op) == VOIDmode
13735 ? mode : GET_MODE (op), 0);
13736 hi_half[num] = simplify_gen_subreg (half_mode, op,
13737 GET_MODE (op) == VOIDmode
13738 ? mode : GET_MODE (op), byte);
13743 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
13744 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
13745 is the expression of the binary operation. The output may either be
13746 emitted here, or returned to the caller, like all output_* functions.
13748 There is no guarantee that the operands are the same mode, as they
13749 might be within FLOAT or FLOAT_EXTEND expressions. */
13751 #ifndef SYSV386_COMPAT
13752 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
13753 wants to fix the assemblers because that causes incompatibility
13754 with gcc. No-one wants to fix gcc because that causes
13755 incompatibility with assemblers... You can use the option of
13756 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
13757 #define SYSV386_COMPAT 1
13761 output_387_binary_op (rtx insn, rtx *operands)
13763 static char buf[40];
13766 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
13768 #ifdef ENABLE_CHECKING
13769 /* Even if we do not want to check the inputs, this documents input
13770 constraints. Which helps in understanding the following code. */
13771 if (STACK_REG_P (operands[0])
13772 && ((REG_P (operands[1])
13773 && REGNO (operands[0]) == REGNO (operands[1])
13774 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
13775 || (REG_P (operands[2])
13776 && REGNO (operands[0]) == REGNO (operands[2])
13777 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
13778 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
13781 gcc_assert (is_sse);
13784 switch (GET_CODE (operands[3]))
13787 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13788 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13796 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13797 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13805 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13806 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13814 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13815 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13823 gcc_unreachable ();
13830 strcpy (buf, ssep);
13831 if (GET_MODE (operands[0]) == SFmode)
13832 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
13834 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
13838 strcpy (buf, ssep + 1);
13839 if (GET_MODE (operands[0]) == SFmode)
13840 strcat (buf, "ss\t{%2, %0|%0, %2}");
13842 strcat (buf, "sd\t{%2, %0|%0, %2}");
13848 switch (GET_CODE (operands[3]))
13852 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
13854 rtx temp = operands[2];
13855 operands[2] = operands[1];
13856 operands[1] = temp;
13859 /* know operands[0] == operands[1]. */
13861 if (MEM_P (operands[2]))
13867 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13869 if (STACK_TOP_P (operands[0]))
13870 /* How is it that we are storing to a dead operand[2]?
13871 Well, presumably operands[1] is dead too. We can't
13872 store the result to st(0) as st(0) gets popped on this
13873 instruction. Instead store to operands[2] (which I
13874 think has to be st(1)). st(1) will be popped later.
13875 gcc <= 2.8.1 didn't have this check and generated
13876 assembly code that the Unixware assembler rejected. */
13877 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13879 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13883 if (STACK_TOP_P (operands[0]))
13884 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13886 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13891 if (MEM_P (operands[1]))
13897 if (MEM_P (operands[2]))
13903 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13906 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
13907 derived assemblers, confusingly reverse the direction of
13908 the operation for fsub{r} and fdiv{r} when the
13909 destination register is not st(0). The Intel assembler
13910 doesn't have this brain damage. Read !SYSV386_COMPAT to
13911 figure out what the hardware really does. */
13912 if (STACK_TOP_P (operands[0]))
13913 p = "{p\t%0, %2|rp\t%2, %0}";
13915 p = "{rp\t%2, %0|p\t%0, %2}";
13917 if (STACK_TOP_P (operands[0]))
13918 /* As above for fmul/fadd, we can't store to st(0). */
13919 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13921 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13926 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
13929 if (STACK_TOP_P (operands[0]))
13930 p = "{rp\t%0, %1|p\t%1, %0}";
13932 p = "{p\t%1, %0|rp\t%0, %1}";
13934 if (STACK_TOP_P (operands[0]))
13935 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
13937 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
13942 if (STACK_TOP_P (operands[0]))
13944 if (STACK_TOP_P (operands[1]))
13945 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13947 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
13950 else if (STACK_TOP_P (operands[1]))
13953 p = "{\t%1, %0|r\t%0, %1}";
13955 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
13961 p = "{r\t%2, %0|\t%0, %2}";
13963 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13969 gcc_unreachable ();
13976 /* Return needed mode for entity in optimize_mode_switching pass. */
13979 ix86_mode_needed (int entity, rtx insn)
13981 enum attr_i387_cw mode;
13983 /* The mode UNINITIALIZED is used to store control word after a
13984 function call or ASM pattern. The mode ANY specify that function
13985 has no requirements on the control word and make no changes in the
13986 bits we are interested in. */
13989 || (NONJUMP_INSN_P (insn)
13990 && (asm_noperands (PATTERN (insn)) >= 0
13991 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
13992 return I387_CW_UNINITIALIZED;
13994 if (recog_memoized (insn) < 0)
13995 return I387_CW_ANY;
13997 mode = get_attr_i387_cw (insn);
14002 if (mode == I387_CW_TRUNC)
14007 if (mode == I387_CW_FLOOR)
14012 if (mode == I387_CW_CEIL)
14017 if (mode == I387_CW_MASK_PM)
14022 gcc_unreachable ();
14025 return I387_CW_ANY;
14028 /* Output code to initialize control word copies used by trunc?f?i and
14029 rounding patterns. CURRENT_MODE is set to current control word,
14030 while NEW_MODE is set to new control word. */
14033 emit_i387_cw_initialization (int mode)
14035 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
14038 enum ix86_stack_slot slot;
14040 rtx reg = gen_reg_rtx (HImode);
14042 emit_insn (gen_x86_fnstcw_1 (stored_mode));
14043 emit_move_insn (reg, copy_rtx (stored_mode));
14045 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
14046 || optimize_function_for_size_p (cfun))
14050 case I387_CW_TRUNC:
14051 /* round toward zero (truncate) */
14052 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
14053 slot = SLOT_CW_TRUNC;
14056 case I387_CW_FLOOR:
14057 /* round down toward -oo */
14058 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14059 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
14060 slot = SLOT_CW_FLOOR;
14064 /* round up toward +oo */
14065 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
14066 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
14067 slot = SLOT_CW_CEIL;
14070 case I387_CW_MASK_PM:
14071 /* mask precision exception for nearbyint() */
14072 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14073 slot = SLOT_CW_MASK_PM;
14077 gcc_unreachable ();
14084 case I387_CW_TRUNC:
14085 /* round toward zero (truncate) */
14086 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
14087 slot = SLOT_CW_TRUNC;
14090 case I387_CW_FLOOR:
14091 /* round down toward -oo */
14092 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
14093 slot = SLOT_CW_FLOOR;
14097 /* round up toward +oo */
14098 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
14099 slot = SLOT_CW_CEIL;
14102 case I387_CW_MASK_PM:
14103 /* mask precision exception for nearbyint() */
14104 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
14105 slot = SLOT_CW_MASK_PM;
14109 gcc_unreachable ();
14113 gcc_assert (slot < MAX_386_STACK_LOCALS);
14115 new_mode = assign_386_stack_local (HImode, slot);
14116 emit_move_insn (new_mode, reg);
14119 /* Output code for INSN to convert a float to a signed int. OPERANDS
14120 are the insn operands. The output may be [HSD]Imode and the input
14121 operand may be [SDX]Fmode. */
14124 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
14126 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14127 int dimode_p = GET_MODE (operands[0]) == DImode;
14128 int round_mode = get_attr_i387_cw (insn);
14130 /* Jump through a hoop or two for DImode, since the hardware has no
14131 non-popping instruction. We used to do this a different way, but
14132 that was somewhat fragile and broke with post-reload splitters. */
14133 if ((dimode_p || fisttp) && !stack_top_dies)
14134 output_asm_insn ("fld\t%y1", operands);
14136 gcc_assert (STACK_TOP_P (operands[1]));
14137 gcc_assert (MEM_P (operands[0]));
14138 gcc_assert (GET_MODE (operands[1]) != TFmode);
14141 output_asm_insn ("fisttp%Z0\t%0", operands);
14144 if (round_mode != I387_CW_ANY)
14145 output_asm_insn ("fldcw\t%3", operands);
14146 if (stack_top_dies || dimode_p)
14147 output_asm_insn ("fistp%Z0\t%0", operands);
14149 output_asm_insn ("fist%Z0\t%0", operands);
14150 if (round_mode != I387_CW_ANY)
14151 output_asm_insn ("fldcw\t%2", operands);
14157 /* Output code for x87 ffreep insn. The OPNO argument, which may only
14158 have the values zero or one, indicates the ffreep insn's operand
14159 from the OPERANDS array. */
14161 static const char *
14162 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
14164 if (TARGET_USE_FFREEP)
14165 #ifdef HAVE_AS_IX86_FFREEP
14166 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
14169 static char retval[32];
14170 int regno = REGNO (operands[opno]);
14172 gcc_assert (FP_REGNO_P (regno));
14174 regno -= FIRST_STACK_REG;
14176 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
14181 return opno ? "fstp\t%y1" : "fstp\t%y0";
14185 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
14186 should be used. UNORDERED_P is true when fucom should be used. */
14189 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
14191 int stack_top_dies;
14192 rtx cmp_op0, cmp_op1;
14193 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
14197 cmp_op0 = operands[0];
14198 cmp_op1 = operands[1];
14202 cmp_op0 = operands[1];
14203 cmp_op1 = operands[2];
14208 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
14209 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
14210 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
14211 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
14213 if (GET_MODE (operands[0]) == SFmode)
14215 return &ucomiss[TARGET_AVX ? 0 : 1];
14217 return &comiss[TARGET_AVX ? 0 : 1];
14220 return &ucomisd[TARGET_AVX ? 0 : 1];
14222 return &comisd[TARGET_AVX ? 0 : 1];
14225 gcc_assert (STACK_TOP_P (cmp_op0));
14227 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
14229 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
14231 if (stack_top_dies)
14233 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
14234 return output_387_ffreep (operands, 1);
14237 return "ftst\n\tfnstsw\t%0";
14240 if (STACK_REG_P (cmp_op1)
14242 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
14243 && REGNO (cmp_op1) != FIRST_STACK_REG)
14245 /* If both the top of the 387 stack dies, and the other operand
14246 is also a stack register that dies, then this must be a
14247 `fcompp' float compare */
14251 /* There is no double popping fcomi variant. Fortunately,
14252 eflags is immune from the fstp's cc clobbering. */
14254 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
14256 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
14257 return output_387_ffreep (operands, 0);
14262 return "fucompp\n\tfnstsw\t%0";
14264 return "fcompp\n\tfnstsw\t%0";
14269 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
14271 static const char * const alt[16] =
14273 "fcom%Z2\t%y2\n\tfnstsw\t%0",
14274 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
14275 "fucom%Z2\t%y2\n\tfnstsw\t%0",
14276 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
14278 "ficom%Z2\t%y2\n\tfnstsw\t%0",
14279 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
14283 "fcomi\t{%y1, %0|%0, %y1}",
14284 "fcomip\t{%y1, %0|%0, %y1}",
14285 "fucomi\t{%y1, %0|%0, %y1}",
14286 "fucomip\t{%y1, %0|%0, %y1}",
14297 mask = eflags_p << 3;
14298 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
14299 mask |= unordered_p << 1;
14300 mask |= stack_top_dies;
14302 gcc_assert (mask < 16);
14311 ix86_output_addr_vec_elt (FILE *file, int value)
14313 const char *directive = ASM_LONG;
14317 directive = ASM_QUAD;
14319 gcc_assert (!TARGET_64BIT);
14322 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
14326 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
14328 const char *directive = ASM_LONG;
14331 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
14332 directive = ASM_QUAD;
14334 gcc_assert (!TARGET_64BIT);
14336 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
14337 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
14338 fprintf (file, "%s%s%d-%s%d\n",
14339 directive, LPREFIX, value, LPREFIX, rel);
14340 else if (HAVE_AS_GOTOFF_IN_DATA)
14341 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
14343 else if (TARGET_MACHO)
14345 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
14346 machopic_output_function_base_name (file);
14351 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
14352 GOT_SYMBOL_NAME, LPREFIX, value);
14355 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
14359 ix86_expand_clear (rtx dest)
14363 /* We play register width games, which are only valid after reload. */
14364 gcc_assert (reload_completed);
14366 /* Avoid HImode and its attendant prefix byte. */
14367 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
14368 dest = gen_rtx_REG (SImode, REGNO (dest));
14369 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
14371 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
14372 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
14374 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14375 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
14381 /* X is an unchanging MEM. If it is a constant pool reference, return
14382 the constant pool rtx, else NULL. */
14385 maybe_get_pool_constant (rtx x)
14387 x = ix86_delegitimize_address (XEXP (x, 0));
14389 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
14390 return get_pool_constant (x);
14396 ix86_expand_move (enum machine_mode mode, rtx operands[])
14399 enum tls_model model;
14404 if (GET_CODE (op1) == SYMBOL_REF)
14406 model = SYMBOL_REF_TLS_MODEL (op1);
14409 op1 = legitimize_tls_address (op1, model, true);
14410 op1 = force_operand (op1, op0);
14414 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
14415 && SYMBOL_REF_DLLIMPORT_P (op1))
14416 op1 = legitimize_dllimport_symbol (op1, false);
14418 else if (GET_CODE (op1) == CONST
14419 && GET_CODE (XEXP (op1, 0)) == PLUS
14420 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
14422 rtx addend = XEXP (XEXP (op1, 0), 1);
14423 rtx symbol = XEXP (XEXP (op1, 0), 0);
14426 model = SYMBOL_REF_TLS_MODEL (symbol);
14428 tmp = legitimize_tls_address (symbol, model, true);
14429 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
14430 && SYMBOL_REF_DLLIMPORT_P (symbol))
14431 tmp = legitimize_dllimport_symbol (symbol, true);
14435 tmp = force_operand (tmp, NULL);
14436 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
14437 op0, 1, OPTAB_DIRECT);
14443 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
14445 if (TARGET_MACHO && !TARGET_64BIT)
14450 rtx temp = ((reload_in_progress
14451 || ((op0 && REG_P (op0))
14453 ? op0 : gen_reg_rtx (Pmode));
14454 op1 = machopic_indirect_data_reference (op1, temp);
14455 op1 = machopic_legitimize_pic_address (op1, mode,
14456 temp == op1 ? 0 : temp);
14458 else if (MACHOPIC_INDIRECT)
14459 op1 = machopic_indirect_data_reference (op1, 0);
14467 op1 = force_reg (Pmode, op1);
14468 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
14470 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
14471 op1 = legitimize_pic_address (op1, reg);
14480 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
14481 || !push_operand (op0, mode))
14483 op1 = force_reg (mode, op1);
14485 if (push_operand (op0, mode)
14486 && ! general_no_elim_operand (op1, mode))
14487 op1 = copy_to_mode_reg (mode, op1);
14489 /* Force large constants in 64bit compilation into register
14490 to get them CSEed. */
14491 if (can_create_pseudo_p ()
14492 && (mode == DImode) && TARGET_64BIT
14493 && immediate_operand (op1, mode)
14494 && !x86_64_zext_immediate_operand (op1, VOIDmode)
14495 && !register_operand (op0, mode)
14497 op1 = copy_to_mode_reg (mode, op1);
14499 if (can_create_pseudo_p ()
14500 && FLOAT_MODE_P (mode)
14501 && GET_CODE (op1) == CONST_DOUBLE)
14503 /* If we are loading a floating point constant to a register,
14504 force the value to memory now, since we'll get better code
14505 out the back end. */
14507 op1 = validize_mem (force_const_mem (mode, op1));
14508 if (!register_operand (op0, mode))
14510 rtx temp = gen_reg_rtx (mode);
14511 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
14512 emit_move_insn (op0, temp);
14518 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
14522 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
14524 rtx op0 = operands[0], op1 = operands[1];
14525 unsigned int align = GET_MODE_ALIGNMENT (mode);
14527 /* Force constants other than zero into memory. We do not know how
14528 the instructions used to build constants modify the upper 64 bits
14529 of the register, once we have that information we may be able
14530 to handle some of them more efficiently. */
14531 if (can_create_pseudo_p ()
14532 && register_operand (op0, mode)
14533 && (CONSTANT_P (op1)
14534 || (GET_CODE (op1) == SUBREG
14535 && CONSTANT_P (SUBREG_REG (op1))))
14536 && !standard_sse_constant_p (op1))
14537 op1 = validize_mem (force_const_mem (mode, op1));
14539 /* We need to check memory alignment for SSE mode since attribute
14540 can make operands unaligned. */
14541 if (can_create_pseudo_p ()
14542 && SSE_REG_MODE_P (mode)
14543 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
14544 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
14548 /* ix86_expand_vector_move_misalign() does not like constants ... */
14549 if (CONSTANT_P (op1)
14550 || (GET_CODE (op1) == SUBREG
14551 && CONSTANT_P (SUBREG_REG (op1))))
14552 op1 = validize_mem (force_const_mem (mode, op1));
14554 /* ... nor both arguments in memory. */
14555 if (!register_operand (op0, mode)
14556 && !register_operand (op1, mode))
14557 op1 = force_reg (mode, op1);
14559 tmp[0] = op0; tmp[1] = op1;
14560 ix86_expand_vector_move_misalign (mode, tmp);
14564 /* Make operand1 a register if it isn't already. */
14565 if (can_create_pseudo_p ()
14566 && !register_operand (op0, mode)
14567 && !register_operand (op1, mode))
14569 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
14573 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
14576 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
14577 straight to ix86_expand_vector_move. */
14578 /* Code generation for scalar reg-reg moves of single and double precision data:
14579 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
14583 if (x86_sse_partial_reg_dependency == true)
14588 Code generation for scalar loads of double precision data:
14589 if (x86_sse_split_regs == true)
14590 movlpd mem, reg (gas syntax)
14594 Code generation for unaligned packed loads of single precision data
14595 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
14596 if (x86_sse_unaligned_move_optimal)
14599 if (x86_sse_partial_reg_dependency == true)
14611 Code generation for unaligned packed loads of double precision data
14612 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
14613 if (x86_sse_unaligned_move_optimal)
14616 if (x86_sse_split_regs == true)
14629 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
14638 switch (GET_MODE_CLASS (mode))
14640 case MODE_VECTOR_INT:
14642 switch (GET_MODE_SIZE (mode))
14645 /* If we're optimizing for size, movups is the smallest. */
14646 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14648 op0 = gen_lowpart (V4SFmode, op0);
14649 op1 = gen_lowpart (V4SFmode, op1);
14650 emit_insn (gen_avx_movups (op0, op1));
14653 op0 = gen_lowpart (V16QImode, op0);
14654 op1 = gen_lowpart (V16QImode, op1);
14655 emit_insn (gen_avx_movdqu (op0, op1));
14658 op0 = gen_lowpart (V32QImode, op0);
14659 op1 = gen_lowpart (V32QImode, op1);
14660 emit_insn (gen_avx_movdqu256 (op0, op1));
14663 gcc_unreachable ();
14666 case MODE_VECTOR_FLOAT:
14667 op0 = gen_lowpart (mode, op0);
14668 op1 = gen_lowpart (mode, op1);
14673 emit_insn (gen_avx_movups (op0, op1));
14676 emit_insn (gen_avx_movups256 (op0, op1));
14679 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14681 op0 = gen_lowpart (V4SFmode, op0);
14682 op1 = gen_lowpart (V4SFmode, op1);
14683 emit_insn (gen_avx_movups (op0, op1));
14686 emit_insn (gen_avx_movupd (op0, op1));
14689 emit_insn (gen_avx_movupd256 (op0, op1));
14692 gcc_unreachable ();
14697 gcc_unreachable ();
14705 /* If we're optimizing for size, movups is the smallest. */
14706 if (optimize_insn_for_size_p ()
14707 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14709 op0 = gen_lowpart (V4SFmode, op0);
14710 op1 = gen_lowpart (V4SFmode, op1);
14711 emit_insn (gen_sse_movups (op0, op1));
14715 /* ??? If we have typed data, then it would appear that using
14716 movdqu is the only way to get unaligned data loaded with
14718 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14720 op0 = gen_lowpart (V16QImode, op0);
14721 op1 = gen_lowpart (V16QImode, op1);
14722 emit_insn (gen_sse2_movdqu (op0, op1));
14726 if (TARGET_SSE2 && mode == V2DFmode)
14730 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
14732 op0 = gen_lowpart (V2DFmode, op0);
14733 op1 = gen_lowpart (V2DFmode, op1);
14734 emit_insn (gen_sse2_movupd (op0, op1));
14738 /* When SSE registers are split into halves, we can avoid
14739 writing to the top half twice. */
14740 if (TARGET_SSE_SPLIT_REGS)
14742 emit_clobber (op0);
14747 /* ??? Not sure about the best option for the Intel chips.
14748 The following would seem to satisfy; the register is
14749 entirely cleared, breaking the dependency chain. We
14750 then store to the upper half, with a dependency depth
14751 of one. A rumor has it that Intel recommends two movsd
14752 followed by an unpacklpd, but this is unconfirmed. And
14753 given that the dependency depth of the unpacklpd would
14754 still be one, I'm not sure why this would be better. */
14755 zero = CONST0_RTX (V2DFmode);
14758 m = adjust_address (op1, DFmode, 0);
14759 emit_insn (gen_sse2_loadlpd (op0, zero, m));
14760 m = adjust_address (op1, DFmode, 8);
14761 emit_insn (gen_sse2_loadhpd (op0, op0, m));
14765 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
14767 op0 = gen_lowpart (V4SFmode, op0);
14768 op1 = gen_lowpart (V4SFmode, op1);
14769 emit_insn (gen_sse_movups (op0, op1));
14773 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
14774 emit_move_insn (op0, CONST0_RTX (mode));
14776 emit_clobber (op0);
14778 if (mode != V4SFmode)
14779 op0 = gen_lowpart (V4SFmode, op0);
14780 m = adjust_address (op1, V2SFmode, 0);
14781 emit_insn (gen_sse_loadlps (op0, op0, m));
14782 m = adjust_address (op1, V2SFmode, 8);
14783 emit_insn (gen_sse_loadhps (op0, op0, m));
14786 else if (MEM_P (op0))
14788 /* If we're optimizing for size, movups is the smallest. */
14789 if (optimize_insn_for_size_p ()
14790 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14792 op0 = gen_lowpart (V4SFmode, op0);
14793 op1 = gen_lowpart (V4SFmode, op1);
14794 emit_insn (gen_sse_movups (op0, op1));
14798 /* ??? Similar to above, only less clear because of quote
14799 typeless stores unquote. */
14800 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
14801 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14803 op0 = gen_lowpart (V16QImode, op0);
14804 op1 = gen_lowpart (V16QImode, op1);
14805 emit_insn (gen_sse2_movdqu (op0, op1));
14809 if (TARGET_SSE2 && mode == V2DFmode)
14811 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
14813 op0 = gen_lowpart (V2DFmode, op0);
14814 op1 = gen_lowpart (V2DFmode, op1);
14815 emit_insn (gen_sse2_movupd (op0, op1));
14819 m = adjust_address (op0, DFmode, 0);
14820 emit_insn (gen_sse2_storelpd (m, op1));
14821 m = adjust_address (op0, DFmode, 8);
14822 emit_insn (gen_sse2_storehpd (m, op1));
14827 if (mode != V4SFmode)
14828 op1 = gen_lowpart (V4SFmode, op1);
14830 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
14832 op0 = gen_lowpart (V4SFmode, op0);
14833 emit_insn (gen_sse_movups (op0, op1));
14837 m = adjust_address (op0, V2SFmode, 0);
14838 emit_insn (gen_sse_storelps (m, op1));
14839 m = adjust_address (op0, V2SFmode, 8);
14840 emit_insn (gen_sse_storehps (m, op1));
14845 gcc_unreachable ();
14848 /* Expand a push in MODE. This is some mode for which we do not support
14849 proper push instructions, at least from the registers that we expect
14850 the value to live in. */
14853 ix86_expand_push (enum machine_mode mode, rtx x)
14857 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
14858 GEN_INT (-GET_MODE_SIZE (mode)),
14859 stack_pointer_rtx, 1, OPTAB_DIRECT);
14860 if (tmp != stack_pointer_rtx)
14861 emit_move_insn (stack_pointer_rtx, tmp);
14863 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
14865 /* When we push an operand onto stack, it has to be aligned at least
14866 at the function argument boundary. However since we don't have
14867 the argument type, we can't determine the actual argument
14869 emit_move_insn (tmp, x);
14872 /* Helper function of ix86_fixup_binary_operands to canonicalize
14873 operand order. Returns true if the operands should be swapped. */
14876 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
14879 rtx dst = operands[0];
14880 rtx src1 = operands[1];
14881 rtx src2 = operands[2];
14883 /* If the operation is not commutative, we can't do anything. */
14884 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
14887 /* Highest priority is that src1 should match dst. */
14888 if (rtx_equal_p (dst, src1))
14890 if (rtx_equal_p (dst, src2))
14893 /* Next highest priority is that immediate constants come second. */
14894 if (immediate_operand (src2, mode))
14896 if (immediate_operand (src1, mode))
14899 /* Lowest priority is that memory references should come second. */
14909 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
14910 destination to use for the operation. If different from the true
14911 destination in operands[0], a copy operation will be required. */
14914 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
14917 rtx dst = operands[0];
14918 rtx src1 = operands[1];
14919 rtx src2 = operands[2];
14921 /* Canonicalize operand order. */
14922 if (ix86_swap_binary_operands_p (code, mode, operands))
14926 /* It is invalid to swap operands of different modes. */
14927 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
14934 /* Both source operands cannot be in memory. */
14935 if (MEM_P (src1) && MEM_P (src2))
14937 /* Optimization: Only read from memory once. */
14938 if (rtx_equal_p (src1, src2))
14940 src2 = force_reg (mode, src2);
14944 src2 = force_reg (mode, src2);
14947 /* If the destination is memory, and we do not have matching source
14948 operands, do things in registers. */
14949 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
14950 dst = gen_reg_rtx (mode);
14952 /* Source 1 cannot be a constant. */
14953 if (CONSTANT_P (src1))
14954 src1 = force_reg (mode, src1);
14956 /* Source 1 cannot be a non-matching memory. */
14957 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
14958 src1 = force_reg (mode, src1);
14960 operands[1] = src1;
14961 operands[2] = src2;
14965 /* Similarly, but assume that the destination has already been
14966 set up properly. */
14969 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
14970 enum machine_mode mode, rtx operands[])
14972 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
14973 gcc_assert (dst == operands[0]);
14976 /* Attempt to expand a binary operator. Make the expansion closer to the
14977 actual machine, then just general_operand, which will allow 3 separate
14978 memory references (one output, two input) in a single insn. */
14981 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
14984 rtx src1, src2, dst, op, clob;
14986 dst = ix86_fixup_binary_operands (code, mode, operands);
14987 src1 = operands[1];
14988 src2 = operands[2];
14990 /* Emit the instruction. */
14992 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
14993 if (reload_in_progress)
14995 /* Reload doesn't know about the flags register, and doesn't know that
14996 it doesn't want to clobber it. We can only do this with PLUS. */
14997 gcc_assert (code == PLUS);
15002 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15003 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
15006 /* Fix up the destination if needed. */
15007 if (dst != operands[0])
15008 emit_move_insn (operands[0], dst);
15011 /* Return TRUE or FALSE depending on whether the binary operator meets the
15012 appropriate constraints. */
15015 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
15018 rtx dst = operands[0];
15019 rtx src1 = operands[1];
15020 rtx src2 = operands[2];
15022 /* Both source operands cannot be in memory. */
15023 if (MEM_P (src1) && MEM_P (src2))
15026 /* Canonicalize operand order for commutative operators. */
15027 if (ix86_swap_binary_operands_p (code, mode, operands))
15034 /* If the destination is memory, we must have a matching source operand. */
15035 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
15038 /* Source 1 cannot be a constant. */
15039 if (CONSTANT_P (src1))
15042 /* Source 1 cannot be a non-matching memory. */
15043 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
15045 /* Support "andhi/andsi/anddi" as a zero-extending move. */
15046 return (code == AND
15049 || (TARGET_64BIT && mode == DImode))
15050 && CONST_INT_P (src2)
15051 && (INTVAL (src2) == 0xff
15052 || INTVAL (src2) == 0xffff));
15058 /* Attempt to expand a unary operator. Make the expansion closer to the
15059 actual machine, then just general_operand, which will allow 2 separate
15060 memory references (one output, one input) in a single insn. */
15063 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
15066 int matching_memory;
15067 rtx src, dst, op, clob;
15072 /* If the destination is memory, and we do not have matching source
15073 operands, do things in registers. */
15074 matching_memory = 0;
15077 if (rtx_equal_p (dst, src))
15078 matching_memory = 1;
15080 dst = gen_reg_rtx (mode);
15083 /* When source operand is memory, destination must match. */
15084 if (MEM_P (src) && !matching_memory)
15085 src = force_reg (mode, src);
15087 /* Emit the instruction. */
15089 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
15090 if (reload_in_progress || code == NOT)
15092 /* Reload doesn't know about the flags register, and doesn't know that
15093 it doesn't want to clobber it. */
15094 gcc_assert (code == NOT);
15099 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15100 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
15103 /* Fix up the destination if needed. */
15104 if (dst != operands[0])
15105 emit_move_insn (operands[0], dst);
15108 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
15109 divisor are within the the range [0-255]. */
15112 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
15115 rtx end_label, qimode_label;
15116 rtx insn, div, mod;
15117 rtx scratch, tmp0, tmp1, tmp2;
15118 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
15119 rtx (*gen_zero_extend) (rtx, rtx);
15120 rtx (*gen_test_ccno_1) (rtx, rtx);
15125 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
15126 gen_test_ccno_1 = gen_testsi_ccno_1;
15127 gen_zero_extend = gen_zero_extendqisi2;
15130 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
15131 gen_test_ccno_1 = gen_testdi_ccno_1;
15132 gen_zero_extend = gen_zero_extendqidi2;
15135 gcc_unreachable ();
15138 end_label = gen_label_rtx ();
15139 qimode_label = gen_label_rtx ();
15141 scratch = gen_reg_rtx (mode);
15143 /* Use 8bit unsigned divimod if dividend and divisor are within the
15144 the range [0-255]. */
15145 emit_move_insn (scratch, operands[2]);
15146 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
15147 scratch, 1, OPTAB_DIRECT);
15148 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
15149 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
15150 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
15151 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
15152 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
15154 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
15155 predict_jump (REG_BR_PROB_BASE * 50 / 100);
15156 JUMP_LABEL (insn) = qimode_label;
15158 /* Generate original signed/unsigned divimod. */
15159 div = gen_divmod4_1 (operands[0], operands[1],
15160 operands[2], operands[3]);
15163 /* Branch to the end. */
15164 emit_jump_insn (gen_jump (end_label));
15167 /* Generate 8bit unsigned divide. */
15168 emit_label (qimode_label);
15169 /* Don't use operands[0] for result of 8bit divide since not all
15170 registers support QImode ZERO_EXTRACT. */
15171 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
15172 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
15173 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
15174 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
15178 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
15179 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
15183 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
15184 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
15187 /* Extract remainder from AH. */
15188 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
15189 if (REG_P (operands[1]))
15190 insn = emit_move_insn (operands[1], tmp1);
15193 /* Need a new scratch register since the old one has result
15195 scratch = gen_reg_rtx (mode);
15196 emit_move_insn (scratch, tmp1);
15197 insn = emit_move_insn (operands[1], scratch);
15199 set_unique_reg_note (insn, REG_EQUAL, mod);
15201 /* Zero extend quotient from AL. */
15202 tmp1 = gen_lowpart (QImode, tmp0);
15203 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
15204 set_unique_reg_note (insn, REG_EQUAL, div);
15206 emit_label (end_label);
15209 #define LEA_SEARCH_THRESHOLD 12
15211 /* Search backward for non-agu definition of register number REGNO1
15212 or register number REGNO2 in INSN's basic block until
15213 1. Pass LEA_SEARCH_THRESHOLD instructions, or
15214 2. Reach BB boundary, or
15215 3. Reach agu definition.
15216 Returns the distance between the non-agu definition point and INSN.
15217 If no definition point, returns -1. */
15220 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
15223 basic_block bb = BLOCK_FOR_INSN (insn);
15226 enum attr_type insn_type;
15228 if (insn != BB_HEAD (bb))
15230 rtx prev = PREV_INSN (insn);
15231 while (prev && distance < LEA_SEARCH_THRESHOLD)
15233 if (NONDEBUG_INSN_P (prev))
15236 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
15237 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15238 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15239 && (regno1 == DF_REF_REGNO (*def_rec)
15240 || regno2 == DF_REF_REGNO (*def_rec)))
15242 insn_type = get_attr_type (prev);
15243 if (insn_type != TYPE_LEA)
15247 if (prev == BB_HEAD (bb))
15249 prev = PREV_INSN (prev);
15253 if (distance < LEA_SEARCH_THRESHOLD)
15257 bool simple_loop = false;
15259 FOR_EACH_EDGE (e, ei, bb->preds)
15262 simple_loop = true;
15268 rtx prev = BB_END (bb);
15271 && distance < LEA_SEARCH_THRESHOLD)
15273 if (NONDEBUG_INSN_P (prev))
15276 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
15277 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15278 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15279 && (regno1 == DF_REF_REGNO (*def_rec)
15280 || regno2 == DF_REF_REGNO (*def_rec)))
15282 insn_type = get_attr_type (prev);
15283 if (insn_type != TYPE_LEA)
15287 prev = PREV_INSN (prev);
15295 /* get_attr_type may modify recog data. We want to make sure
15296 that recog data is valid for instruction INSN, on which
15297 distance_non_agu_define is called. INSN is unchanged here. */
15298 extract_insn_cached (insn);
15302 /* Return the distance between INSN and the next insn that uses
15303 register number REGNO0 in memory address. Return -1 if no such
15304 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
15307 distance_agu_use (unsigned int regno0, rtx insn)
15309 basic_block bb = BLOCK_FOR_INSN (insn);
15314 if (insn != BB_END (bb))
15316 rtx next = NEXT_INSN (insn);
15317 while (next && distance < LEA_SEARCH_THRESHOLD)
15319 if (NONDEBUG_INSN_P (next))
15323 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
15324 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
15325 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
15326 && regno0 == DF_REF_REGNO (*use_rec))
15328 /* Return DISTANCE if OP0 is used in memory
15329 address in NEXT. */
15333 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
15334 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15335 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15336 && regno0 == DF_REF_REGNO (*def_rec))
15338 /* Return -1 if OP0 is set in NEXT. */
15342 if (next == BB_END (bb))
15344 next = NEXT_INSN (next);
15348 if (distance < LEA_SEARCH_THRESHOLD)
15352 bool simple_loop = false;
15354 FOR_EACH_EDGE (e, ei, bb->succs)
15357 simple_loop = true;
15363 rtx next = BB_HEAD (bb);
15366 && distance < LEA_SEARCH_THRESHOLD)
15368 if (NONDEBUG_INSN_P (next))
15372 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
15373 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
15374 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
15375 && regno0 == DF_REF_REGNO (*use_rec))
15377 /* Return DISTANCE if OP0 is used in memory
15378 address in NEXT. */
15382 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
15383 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
15384 && !DF_REF_IS_ARTIFICIAL (*def_rec)
15385 && regno0 == DF_REF_REGNO (*def_rec))
15387 /* Return -1 if OP0 is set in NEXT. */
15392 next = NEXT_INSN (next);
15400 /* Define this macro to tune LEA priority vs ADD, it take effect when
15401 there is a dilemma of choicing LEA or ADD
15402 Negative value: ADD is more preferred than LEA
15404 Positive value: LEA is more preferred than ADD*/
15405 #define IX86_LEA_PRIORITY 2
15407 /* Return true if it is ok to optimize an ADD operation to LEA
15408 operation to avoid flag register consumation. For most processors,
15409 ADD is faster than LEA. For the processors like ATOM, if the
15410 destination register of LEA holds an actual address which will be
15411 used soon, LEA is better and otherwise ADD is better. */
15414 ix86_lea_for_add_ok (rtx insn, rtx operands[])
15416 unsigned int regno0 = true_regnum (operands[0]);
15417 unsigned int regno1 = true_regnum (operands[1]);
15418 unsigned int regno2 = true_regnum (operands[2]);
15420 /* If a = b + c, (a!=b && a!=c), must use lea form. */
15421 if (regno0 != regno1 && regno0 != regno2)
15424 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
15428 int dist_define, dist_use;
15430 /* Return false if REGNO0 isn't used in memory address. */
15431 dist_use = distance_agu_use (regno0, insn);
15435 dist_define = distance_non_agu_define (regno1, regno2, insn);
15436 if (dist_define <= 0)
15439 /* If this insn has both backward non-agu dependence and forward
15440 agu dependence, the one with short distance take effect. */
15441 if ((dist_define + IX86_LEA_PRIORITY) < dist_use)
15448 /* Return true if destination reg of SET_BODY is shift count of
15452 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
15458 /* Retrieve destination of SET_BODY. */
15459 switch (GET_CODE (set_body))
15462 set_dest = SET_DEST (set_body);
15463 if (!set_dest || !REG_P (set_dest))
15467 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
15468 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
15476 /* Retrieve shift count of USE_BODY. */
15477 switch (GET_CODE (use_body))
15480 shift_rtx = XEXP (use_body, 1);
15483 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
15484 if (ix86_dep_by_shift_count_body (set_body,
15485 XVECEXP (use_body, 0, i)))
15493 && (GET_CODE (shift_rtx) == ASHIFT
15494 || GET_CODE (shift_rtx) == LSHIFTRT
15495 || GET_CODE (shift_rtx) == ASHIFTRT
15496 || GET_CODE (shift_rtx) == ROTATE
15497 || GET_CODE (shift_rtx) == ROTATERT))
15499 rtx shift_count = XEXP (shift_rtx, 1);
15501 /* Return true if shift count is dest of SET_BODY. */
15502 if (REG_P (shift_count)
15503 && true_regnum (set_dest) == true_regnum (shift_count))
15510 /* Return true if destination reg of SET_INSN is shift count of
15514 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
15516 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
15517 PATTERN (use_insn));
15520 /* Return TRUE or FALSE depending on whether the unary operator meets the
15521 appropriate constraints. */
15524 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
15525 enum machine_mode mode ATTRIBUTE_UNUSED,
15526 rtx operands[2] ATTRIBUTE_UNUSED)
15528 /* If one of operands is memory, source and destination must match. */
15529 if ((MEM_P (operands[0])
15530 || MEM_P (operands[1]))
15531 && ! rtx_equal_p (operands[0], operands[1]))
15536 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
15537 are ok, keeping in mind the possible movddup alternative. */
15540 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
15542 if (MEM_P (operands[0]))
15543 return rtx_equal_p (operands[0], operands[1 + high]);
15544 if (MEM_P (operands[1]) && MEM_P (operands[2]))
15545 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
15549 /* Post-reload splitter for converting an SF or DFmode value in an
15550 SSE register into an unsigned SImode. */
15553 ix86_split_convert_uns_si_sse (rtx operands[])
15555 enum machine_mode vecmode;
15556 rtx value, large, zero_or_two31, input, two31, x;
15558 large = operands[1];
15559 zero_or_two31 = operands[2];
15560 input = operands[3];
15561 two31 = operands[4];
15562 vecmode = GET_MODE (large);
15563 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
15565 /* Load up the value into the low element. We must ensure that the other
15566 elements are valid floats -- zero is the easiest such value. */
15569 if (vecmode == V4SFmode)
15570 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
15572 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
15576 input = gen_rtx_REG (vecmode, REGNO (input));
15577 emit_move_insn (value, CONST0_RTX (vecmode));
15578 if (vecmode == V4SFmode)
15579 emit_insn (gen_sse_movss (value, value, input));
15581 emit_insn (gen_sse2_movsd (value, value, input));
15584 emit_move_insn (large, two31);
15585 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
15587 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
15588 emit_insn (gen_rtx_SET (VOIDmode, large, x));
15590 x = gen_rtx_AND (vecmode, zero_or_two31, large);
15591 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
15593 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
15594 emit_insn (gen_rtx_SET (VOIDmode, value, x));
15596 large = gen_rtx_REG (V4SImode, REGNO (large));
15597 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
15599 x = gen_rtx_REG (V4SImode, REGNO (value));
15600 if (vecmode == V4SFmode)
15601 emit_insn (gen_sse2_cvttps2dq (x, value));
15603 emit_insn (gen_sse2_cvttpd2dq (x, value));
15606 emit_insn (gen_xorv4si3 (value, value, large));
15609 /* Convert an unsigned DImode value into a DFmode, using only SSE.
15610 Expects the 64-bit DImode to be supplied in a pair of integral
15611 registers. Requires SSE2; will use SSE3 if available. For x86_32,
15612 -mfpmath=sse, !optimize_size only. */
15615 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
15617 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
15618 rtx int_xmm, fp_xmm;
15619 rtx biases, exponents;
15622 int_xmm = gen_reg_rtx (V4SImode);
15623 if (TARGET_INTER_UNIT_MOVES)
15624 emit_insn (gen_movdi_to_sse (int_xmm, input));
15625 else if (TARGET_SSE_SPLIT_REGS)
15627 emit_clobber (int_xmm);
15628 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
15632 x = gen_reg_rtx (V2DImode);
15633 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
15634 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
15637 x = gen_rtx_CONST_VECTOR (V4SImode,
15638 gen_rtvec (4, GEN_INT (0x43300000UL),
15639 GEN_INT (0x45300000UL),
15640 const0_rtx, const0_rtx));
15641 exponents = validize_mem (force_const_mem (V4SImode, x));
15643 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
15644 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
15646 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
15647 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
15648 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
15649 (0x1.0p84 + double(fp_value_hi_xmm)).
15650 Note these exponents differ by 32. */
15652 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
15654 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
15655 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
15656 real_ldexp (&bias_lo_rvt, &dconst1, 52);
15657 real_ldexp (&bias_hi_rvt, &dconst1, 84);
15658 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
15659 x = const_double_from_real_value (bias_hi_rvt, DFmode);
15660 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
15661 biases = validize_mem (force_const_mem (V2DFmode, biases));
15662 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
15664 /* Add the upper and lower DFmode values together. */
15666 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
15669 x = copy_to_mode_reg (V2DFmode, fp_xmm);
15670 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
15671 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
15674 ix86_expand_vector_extract (false, target, fp_xmm, 0);
15677 /* Not used, but eases macroization of patterns. */
15679 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
15680 rtx input ATTRIBUTE_UNUSED)
15682 gcc_unreachable ();
15685 /* Convert an unsigned SImode value into a DFmode. Only currently used
15686 for SSE, but applicable anywhere. */
15689 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
15691 REAL_VALUE_TYPE TWO31r;
15694 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
15695 NULL, 1, OPTAB_DIRECT);
15697 fp = gen_reg_rtx (DFmode);
15698 emit_insn (gen_floatsidf2 (fp, x));
15700 real_ldexp (&TWO31r, &dconst1, 31);
15701 x = const_double_from_real_value (TWO31r, DFmode);
15703 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
15705 emit_move_insn (target, x);
15708 /* Convert a signed DImode value into a DFmode. Only used for SSE in
15709 32-bit mode; otherwise we have a direct convert instruction. */
15712 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
15714 REAL_VALUE_TYPE TWO32r;
15715 rtx fp_lo, fp_hi, x;
15717 fp_lo = gen_reg_rtx (DFmode);
15718 fp_hi = gen_reg_rtx (DFmode);
15720 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
15722 real_ldexp (&TWO32r, &dconst1, 32);
15723 x = const_double_from_real_value (TWO32r, DFmode);
15724 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
15726 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
15728 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
15731 emit_move_insn (target, x);
15734 /* Convert an unsigned SImode value into a SFmode, using only SSE.
15735 For x86_32, -mfpmath=sse, !optimize_size only. */
15737 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
15739 REAL_VALUE_TYPE ONE16r;
15740 rtx fp_hi, fp_lo, int_hi, int_lo, x;
15742 real_ldexp (&ONE16r, &dconst1, 16);
15743 x = const_double_from_real_value (ONE16r, SFmode);
15744 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
15745 NULL, 0, OPTAB_DIRECT);
15746 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
15747 NULL, 0, OPTAB_DIRECT);
15748 fp_hi = gen_reg_rtx (SFmode);
15749 fp_lo = gen_reg_rtx (SFmode);
15750 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
15751 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
15752 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
15754 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
15756 if (!rtx_equal_p (target, fp_hi))
15757 emit_move_insn (target, fp_hi);
15760 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
15761 then replicate the value for all elements of the vector
15765 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
15772 v = gen_rtvec (4, value, value, value, value);
15773 return gen_rtx_CONST_VECTOR (V4SImode, v);
15777 v = gen_rtvec (2, value, value);
15778 return gen_rtx_CONST_VECTOR (V2DImode, v);
15782 v = gen_rtvec (8, value, value, value, value,
15783 value, value, value, value);
15785 v = gen_rtvec (8, value, CONST0_RTX (SFmode),
15786 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
15787 CONST0_RTX (SFmode), CONST0_RTX (SFmode),
15788 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
15789 return gen_rtx_CONST_VECTOR (V8SFmode, v);
15793 v = gen_rtvec (4, value, value, value, value);
15795 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
15796 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
15797 return gen_rtx_CONST_VECTOR (V4SFmode, v);
15801 v = gen_rtvec (4, value, value, value, value);
15803 v = gen_rtvec (4, value, CONST0_RTX (DFmode),
15804 CONST0_RTX (DFmode), CONST0_RTX (DFmode));
15805 return gen_rtx_CONST_VECTOR (V4DFmode, v);
15809 v = gen_rtvec (2, value, value);
15811 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
15812 return gen_rtx_CONST_VECTOR (V2DFmode, v);
15815 gcc_unreachable ();
15819 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
15820 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
15821 for an SSE register. If VECT is true, then replicate the mask for
15822 all elements of the vector register. If INVERT is true, then create
15823 a mask excluding the sign bit. */
15826 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
15828 enum machine_mode vec_mode, imode;
15829 HOST_WIDE_INT hi, lo;
15834 /* Find the sign bit, sign extended to 2*HWI. */
15841 mode = GET_MODE_INNER (mode);
15843 lo = 0x80000000, hi = lo < 0;
15850 mode = GET_MODE_INNER (mode);
15852 if (HOST_BITS_PER_WIDE_INT >= 64)
15853 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
15855 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
15860 vec_mode = VOIDmode;
15861 if (HOST_BITS_PER_WIDE_INT >= 64)
15864 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
15871 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
15875 lo = ~lo, hi = ~hi;
15881 mask = immed_double_const (lo, hi, imode);
15883 vec = gen_rtvec (2, v, mask);
15884 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
15885 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
15892 gcc_unreachable ();
15896 lo = ~lo, hi = ~hi;
15898 /* Force this value into the low part of a fp vector constant. */
15899 mask = immed_double_const (lo, hi, imode);
15900 mask = gen_lowpart (mode, mask);
15902 if (vec_mode == VOIDmode)
15903 return force_reg (mode, mask);
15905 v = ix86_build_const_vector (vec_mode, vect, mask);
15906 return force_reg (vec_mode, v);
15909 /* Generate code for floating point ABS or NEG. */
15912 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
15915 rtx mask, set, use, clob, dst, src;
15916 bool use_sse = false;
15917 bool vector_mode = VECTOR_MODE_P (mode);
15918 enum machine_mode vmode = mode;
15922 else if (mode == TFmode)
15924 else if (TARGET_SSE_MATH)
15926 use_sse = SSE_FLOAT_MODE_P (mode);
15927 if (mode == SFmode)
15929 else if (mode == DFmode)
15933 /* NEG and ABS performed with SSE use bitwise mask operations.
15934 Create the appropriate mask now. */
15936 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
15945 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
15946 set = gen_rtx_SET (VOIDmode, dst, set);
15951 set = gen_rtx_fmt_e (code, mode, src);
15952 set = gen_rtx_SET (VOIDmode, dst, set);
15955 use = gen_rtx_USE (VOIDmode, mask);
15956 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15957 emit_insn (gen_rtx_PARALLEL (VOIDmode,
15958 gen_rtvec (3, set, use, clob)));
15965 /* Expand a copysign operation. Special case operand 0 being a constant. */
15968 ix86_expand_copysign (rtx operands[])
15970 enum machine_mode mode, vmode;
15971 rtx dest, op0, op1, mask, nmask;
15973 dest = operands[0];
15977 mode = GET_MODE (dest);
15979 if (mode == SFmode)
15981 else if (mode == DFmode)
15986 if (GET_CODE (op0) == CONST_DOUBLE)
15988 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
15990 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
15991 op0 = simplify_unary_operation (ABS, mode, op0, mode);
15993 if (mode == SFmode || mode == DFmode)
15995 if (op0 == CONST0_RTX (mode))
15996 op0 = CONST0_RTX (vmode);
15999 rtx v = ix86_build_const_vector (vmode, false, op0);
16001 op0 = force_reg (vmode, v);
16004 else if (op0 != CONST0_RTX (mode))
16005 op0 = force_reg (mode, op0);
16007 mask = ix86_build_signbit_mask (vmode, 0, 0);
16009 if (mode == SFmode)
16010 copysign_insn = gen_copysignsf3_const;
16011 else if (mode == DFmode)
16012 copysign_insn = gen_copysigndf3_const;
16014 copysign_insn = gen_copysigntf3_const;
16016 emit_insn (copysign_insn (dest, op0, op1, mask));
16020 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
16022 nmask = ix86_build_signbit_mask (vmode, 0, 1);
16023 mask = ix86_build_signbit_mask (vmode, 0, 0);
16025 if (mode == SFmode)
16026 copysign_insn = gen_copysignsf3_var;
16027 else if (mode == DFmode)
16028 copysign_insn = gen_copysigndf3_var;
16030 copysign_insn = gen_copysigntf3_var;
16032 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
16036 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
16037 be a constant, and so has already been expanded into a vector constant. */
16040 ix86_split_copysign_const (rtx operands[])
16042 enum machine_mode mode, vmode;
16043 rtx dest, op0, mask, x;
16045 dest = operands[0];
16047 mask = operands[3];
16049 mode = GET_MODE (dest);
16050 vmode = GET_MODE (mask);
16052 dest = simplify_gen_subreg (vmode, dest, mode, 0);
16053 x = gen_rtx_AND (vmode, dest, mask);
16054 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16056 if (op0 != CONST0_RTX (vmode))
16058 x = gen_rtx_IOR (vmode, dest, op0);
16059 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16063 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
16064 so we have to do two masks. */
16067 ix86_split_copysign_var (rtx operands[])
16069 enum machine_mode mode, vmode;
16070 rtx dest, scratch, op0, op1, mask, nmask, x;
16072 dest = operands[0];
16073 scratch = operands[1];
16076 nmask = operands[4];
16077 mask = operands[5];
16079 mode = GET_MODE (dest);
16080 vmode = GET_MODE (mask);
16082 if (rtx_equal_p (op0, op1))
16084 /* Shouldn't happen often (it's useless, obviously), but when it does
16085 we'd generate incorrect code if we continue below. */
16086 emit_move_insn (dest, op0);
16090 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
16092 gcc_assert (REGNO (op1) == REGNO (scratch));
16094 x = gen_rtx_AND (vmode, scratch, mask);
16095 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
16098 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
16099 x = gen_rtx_NOT (vmode, dest);
16100 x = gen_rtx_AND (vmode, x, op0);
16101 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16105 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
16107 x = gen_rtx_AND (vmode, scratch, mask);
16109 else /* alternative 2,4 */
16111 gcc_assert (REGNO (mask) == REGNO (scratch));
16112 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
16113 x = gen_rtx_AND (vmode, scratch, op1);
16115 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
16117 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
16119 dest = simplify_gen_subreg (vmode, op0, mode, 0);
16120 x = gen_rtx_AND (vmode, dest, nmask);
16122 else /* alternative 3,4 */
16124 gcc_assert (REGNO (nmask) == REGNO (dest));
16126 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
16127 x = gen_rtx_AND (vmode, dest, op0);
16129 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16132 x = gen_rtx_IOR (vmode, dest, scratch);
16133 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16136 /* Return TRUE or FALSE depending on whether the first SET in INSN
16137 has source and destination with matching CC modes, and that the
16138 CC mode is at least as constrained as REQ_MODE. */
16141 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
16144 enum machine_mode set_mode;
16146 set = PATTERN (insn);
16147 if (GET_CODE (set) == PARALLEL)
16148 set = XVECEXP (set, 0, 0);
16149 gcc_assert (GET_CODE (set) == SET);
16150 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
16152 set_mode = GET_MODE (SET_DEST (set));
16156 if (req_mode != CCNOmode
16157 && (req_mode != CCmode
16158 || XEXP (SET_SRC (set), 1) != const0_rtx))
16162 if (req_mode == CCGCmode)
16166 if (req_mode == CCGOCmode || req_mode == CCNOmode)
16170 if (req_mode == CCZmode)
16181 gcc_unreachable ();
16184 return GET_MODE (SET_SRC (set)) == set_mode;
16187 /* Generate insn patterns to do an integer compare of OPERANDS. */
16190 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
16192 enum machine_mode cmpmode;
16195 cmpmode = SELECT_CC_MODE (code, op0, op1);
16196 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
16198 /* This is very simple, but making the interface the same as in the
16199 FP case makes the rest of the code easier. */
16200 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
16201 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
16203 /* Return the test that should be put into the flags user, i.e.
16204 the bcc, scc, or cmov instruction. */
16205 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
16208 /* Figure out whether to use ordered or unordered fp comparisons.
16209 Return the appropriate mode to use. */
16212 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
16214 /* ??? In order to make all comparisons reversible, we do all comparisons
16215 non-trapping when compiling for IEEE. Once gcc is able to distinguish
16216 all forms trapping and nontrapping comparisons, we can make inequality
16217 comparisons trapping again, since it results in better code when using
16218 FCOM based compares. */
16219 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
16223 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
16225 enum machine_mode mode = GET_MODE (op0);
16227 if (SCALAR_FLOAT_MODE_P (mode))
16229 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
16230 return ix86_fp_compare_mode (code);
16235 /* Only zero flag is needed. */
16236 case EQ: /* ZF=0 */
16237 case NE: /* ZF!=0 */
16239 /* Codes needing carry flag. */
16240 case GEU: /* CF=0 */
16241 case LTU: /* CF=1 */
16242 /* Detect overflow checks. They need just the carry flag. */
16243 if (GET_CODE (op0) == PLUS
16244 && rtx_equal_p (op1, XEXP (op0, 0)))
16248 case GTU: /* CF=0 & ZF=0 */
16249 case LEU: /* CF=1 | ZF=1 */
16250 /* Detect overflow checks. They need just the carry flag. */
16251 if (GET_CODE (op0) == MINUS
16252 && rtx_equal_p (op1, XEXP (op0, 0)))
16256 /* Codes possibly doable only with sign flag when
16257 comparing against zero. */
16258 case GE: /* SF=OF or SF=0 */
16259 case LT: /* SF<>OF or SF=1 */
16260 if (op1 == const0_rtx)
16263 /* For other cases Carry flag is not required. */
16265 /* Codes doable only with sign flag when comparing
16266 against zero, but we miss jump instruction for it
16267 so we need to use relational tests against overflow
16268 that thus needs to be zero. */
16269 case GT: /* ZF=0 & SF=OF */
16270 case LE: /* ZF=1 | SF<>OF */
16271 if (op1 == const0_rtx)
16275 /* strcmp pattern do (use flags) and combine may ask us for proper
16280 gcc_unreachable ();
16284 /* Return the fixed registers used for condition codes. */
16287 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
16294 /* If two condition code modes are compatible, return a condition code
16295 mode which is compatible with both. Otherwise, return
16298 static enum machine_mode
16299 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
16304 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
16307 if ((m1 == CCGCmode && m2 == CCGOCmode)
16308 || (m1 == CCGOCmode && m2 == CCGCmode))
16314 gcc_unreachable ();
16344 /* These are only compatible with themselves, which we already
16351 /* Return a comparison we can do and that it is equivalent to
16352 swap_condition (code) apart possibly from orderedness.
16353 But, never change orderedness if TARGET_IEEE_FP, returning
16354 UNKNOWN in that case if necessary. */
16356 static enum rtx_code
16357 ix86_fp_swap_condition (enum rtx_code code)
16361 case GT: /* GTU - CF=0 & ZF=0 */
16362 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
16363 case GE: /* GEU - CF=0 */
16364 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
16365 case UNLT: /* LTU - CF=1 */
16366 return TARGET_IEEE_FP ? UNKNOWN : GT;
16367 case UNLE: /* LEU - CF=1 | ZF=1 */
16368 return TARGET_IEEE_FP ? UNKNOWN : GE;
16370 return swap_condition (code);
16374 /* Return cost of comparison CODE using the best strategy for performance.
16375 All following functions do use number of instructions as a cost metrics.
16376 In future this should be tweaked to compute bytes for optimize_size and
16377 take into account performance of various instructions on various CPUs. */
16380 ix86_fp_comparison_cost (enum rtx_code code)
16384 /* The cost of code using bit-twiddling on %ah. */
16401 arith_cost = TARGET_IEEE_FP ? 5 : 4;
16405 arith_cost = TARGET_IEEE_FP ? 6 : 4;
16408 gcc_unreachable ();
16411 switch (ix86_fp_comparison_strategy (code))
16413 case IX86_FPCMP_COMI:
16414 return arith_cost > 4 ? 3 : 2;
16415 case IX86_FPCMP_SAHF:
16416 return arith_cost > 4 ? 4 : 3;
16422 /* Return strategy to use for floating-point. We assume that fcomi is always
16423 preferrable where available, since that is also true when looking at size
16424 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
16426 enum ix86_fpcmp_strategy
16427 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
16429 /* Do fcomi/sahf based test when profitable. */
16432 return IX86_FPCMP_COMI;
16434 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
16435 return IX86_FPCMP_SAHF;
16437 return IX86_FPCMP_ARITH;
16440 /* Swap, force into registers, or otherwise massage the two operands
16441 to a fp comparison. The operands are updated in place; the new
16442 comparison code is returned. */
16444 static enum rtx_code
16445 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
16447 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
16448 rtx op0 = *pop0, op1 = *pop1;
16449 enum machine_mode op_mode = GET_MODE (op0);
16450 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
16452 /* All of the unordered compare instructions only work on registers.
16453 The same is true of the fcomi compare instructions. The XFmode
16454 compare instructions require registers except when comparing
16455 against zero or when converting operand 1 from fixed point to
16459 && (fpcmp_mode == CCFPUmode
16460 || (op_mode == XFmode
16461 && ! (standard_80387_constant_p (op0) == 1
16462 || standard_80387_constant_p (op1) == 1)
16463 && GET_CODE (op1) != FLOAT)
16464 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
16466 op0 = force_reg (op_mode, op0);
16467 op1 = force_reg (op_mode, op1);
16471 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
16472 things around if they appear profitable, otherwise force op0
16473 into a register. */
16475 if (standard_80387_constant_p (op0) == 0
16477 && ! (standard_80387_constant_p (op1) == 0
16480 enum rtx_code new_code = ix86_fp_swap_condition (code);
16481 if (new_code != UNKNOWN)
16484 tmp = op0, op0 = op1, op1 = tmp;
16490 op0 = force_reg (op_mode, op0);
16492 if (CONSTANT_P (op1))
16494 int tmp = standard_80387_constant_p (op1);
16496 op1 = validize_mem (force_const_mem (op_mode, op1));
16500 op1 = force_reg (op_mode, op1);
16503 op1 = force_reg (op_mode, op1);
16507 /* Try to rearrange the comparison to make it cheaper. */
16508 if (ix86_fp_comparison_cost (code)
16509 > ix86_fp_comparison_cost (swap_condition (code))
16510 && (REG_P (op1) || can_create_pseudo_p ()))
16513 tmp = op0, op0 = op1, op1 = tmp;
16514 code = swap_condition (code);
16516 op0 = force_reg (op_mode, op0);
16524 /* Convert comparison codes we use to represent FP comparison to integer
16525 code that will result in proper branch. Return UNKNOWN if no such code
16529 ix86_fp_compare_code_to_integer (enum rtx_code code)
16558 /* Generate insn patterns to do a floating point compare of OPERANDS. */
16561 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
16563 enum machine_mode fpcmp_mode, intcmp_mode;
16566 fpcmp_mode = ix86_fp_compare_mode (code);
16567 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
16569 /* Do fcomi/sahf based test when profitable. */
16570 switch (ix86_fp_comparison_strategy (code))
16572 case IX86_FPCMP_COMI:
16573 intcmp_mode = fpcmp_mode;
16574 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
16575 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
16580 case IX86_FPCMP_SAHF:
16581 intcmp_mode = fpcmp_mode;
16582 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
16583 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
16587 scratch = gen_reg_rtx (HImode);
16588 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
16589 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
16592 case IX86_FPCMP_ARITH:
16593 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
16594 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
16595 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
16597 scratch = gen_reg_rtx (HImode);
16598 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
16600 /* In the unordered case, we have to check C2 for NaN's, which
16601 doesn't happen to work out to anything nice combination-wise.
16602 So do some bit twiddling on the value we've got in AH to come
16603 up with an appropriate set of condition codes. */
16605 intcmp_mode = CCNOmode;
16610 if (code == GT || !TARGET_IEEE_FP)
16612 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
16617 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16618 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
16619 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
16620 intcmp_mode = CCmode;
16626 if (code == LT && TARGET_IEEE_FP)
16628 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16629 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
16630 intcmp_mode = CCmode;
16635 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
16641 if (code == GE || !TARGET_IEEE_FP)
16643 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
16648 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16649 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
16655 if (code == LE && TARGET_IEEE_FP)
16657 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16658 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
16659 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
16660 intcmp_mode = CCmode;
16665 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
16671 if (code == EQ && TARGET_IEEE_FP)
16673 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16674 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
16675 intcmp_mode = CCmode;
16680 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
16686 if (code == NE && TARGET_IEEE_FP)
16688 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16689 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
16695 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
16701 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
16705 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
16710 gcc_unreachable ();
16718 /* Return the test that should be put into the flags user, i.e.
16719 the bcc, scc, or cmov instruction. */
16720 return gen_rtx_fmt_ee (code, VOIDmode,
16721 gen_rtx_REG (intcmp_mode, FLAGS_REG),
16726 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
16730 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
16731 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
16733 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
16735 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
16736 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
16739 ret = ix86_expand_int_compare (code, op0, op1);
16745 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
16747 enum machine_mode mode = GET_MODE (op0);
16759 tmp = ix86_expand_compare (code, op0, op1);
16760 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
16761 gen_rtx_LABEL_REF (VOIDmode, label),
16763 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
16770 /* Expand DImode branch into multiple compare+branch. */
16772 rtx lo[2], hi[2], label2;
16773 enum rtx_code code1, code2, code3;
16774 enum machine_mode submode;
16776 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
16778 tmp = op0, op0 = op1, op1 = tmp;
16779 code = swap_condition (code);
16782 split_double_mode (mode, &op0, 1, lo+0, hi+0);
16783 split_double_mode (mode, &op1, 1, lo+1, hi+1);
16785 submode = mode == DImode ? SImode : DImode;
16787 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
16788 avoid two branches. This costs one extra insn, so disable when
16789 optimizing for size. */
16791 if ((code == EQ || code == NE)
16792 && (!optimize_insn_for_size_p ()
16793 || hi[1] == const0_rtx || lo[1] == const0_rtx))
16798 if (hi[1] != const0_rtx)
16799 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
16800 NULL_RTX, 0, OPTAB_WIDEN);
16803 if (lo[1] != const0_rtx)
16804 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
16805 NULL_RTX, 0, OPTAB_WIDEN);
16807 tmp = expand_binop (submode, ior_optab, xor1, xor0,
16808 NULL_RTX, 0, OPTAB_WIDEN);
16810 ix86_expand_branch (code, tmp, const0_rtx, label);
16814 /* Otherwise, if we are doing less-than or greater-or-equal-than,
16815 op1 is a constant and the low word is zero, then we can just
16816 examine the high word. Similarly for low word -1 and
16817 less-or-equal-than or greater-than. */
16819 if (CONST_INT_P (hi[1]))
16822 case LT: case LTU: case GE: case GEU:
16823 if (lo[1] == const0_rtx)
16825 ix86_expand_branch (code, hi[0], hi[1], label);
16829 case LE: case LEU: case GT: case GTU:
16830 if (lo[1] == constm1_rtx)
16832 ix86_expand_branch (code, hi[0], hi[1], label);
16840 /* Otherwise, we need two or three jumps. */
16842 label2 = gen_label_rtx ();
16845 code2 = swap_condition (code);
16846 code3 = unsigned_condition (code);
16850 case LT: case GT: case LTU: case GTU:
16853 case LE: code1 = LT; code2 = GT; break;
16854 case GE: code1 = GT; code2 = LT; break;
16855 case LEU: code1 = LTU; code2 = GTU; break;
16856 case GEU: code1 = GTU; code2 = LTU; break;
16858 case EQ: code1 = UNKNOWN; code2 = NE; break;
16859 case NE: code2 = UNKNOWN; break;
16862 gcc_unreachable ();
16867 * if (hi(a) < hi(b)) goto true;
16868 * if (hi(a) > hi(b)) goto false;
16869 * if (lo(a) < lo(b)) goto true;
16873 if (code1 != UNKNOWN)
16874 ix86_expand_branch (code1, hi[0], hi[1], label);
16875 if (code2 != UNKNOWN)
16876 ix86_expand_branch (code2, hi[0], hi[1], label2);
16878 ix86_expand_branch (code3, lo[0], lo[1], label);
16880 if (code2 != UNKNOWN)
16881 emit_label (label2);
16886 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
16891 /* Split branch based on floating point condition. */
16893 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
16894 rtx target1, rtx target2, rtx tmp, rtx pushed)
16899 if (target2 != pc_rtx)
16902 code = reverse_condition_maybe_unordered (code);
16907 condition = ix86_expand_fp_compare (code, op1, op2,
16910 /* Remove pushed operand from stack. */
16912 ix86_free_from_memory (GET_MODE (pushed));
16914 i = emit_jump_insn (gen_rtx_SET
16916 gen_rtx_IF_THEN_ELSE (VOIDmode,
16917 condition, target1, target2)));
16918 if (split_branch_probability >= 0)
16919 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
16923 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
16927 gcc_assert (GET_MODE (dest) == QImode);
16929 ret = ix86_expand_compare (code, op0, op1);
16930 PUT_MODE (ret, QImode);
16931 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
16934 /* Expand comparison setting or clearing carry flag. Return true when
16935 successful and set pop for the operation. */
16937 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
16939 enum machine_mode mode =
16940 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
16942 /* Do not handle double-mode compares that go through special path. */
16943 if (mode == (TARGET_64BIT ? TImode : DImode))
16946 if (SCALAR_FLOAT_MODE_P (mode))
16948 rtx compare_op, compare_seq;
16950 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
16952 /* Shortcut: following common codes never translate
16953 into carry flag compares. */
16954 if (code == EQ || code == NE || code == UNEQ || code == LTGT
16955 || code == ORDERED || code == UNORDERED)
16958 /* These comparisons require zero flag; swap operands so they won't. */
16959 if ((code == GT || code == UNLE || code == LE || code == UNGT)
16960 && !TARGET_IEEE_FP)
16965 code = swap_condition (code);
16968 /* Try to expand the comparison and verify that we end up with
16969 carry flag based comparison. This fails to be true only when
16970 we decide to expand comparison using arithmetic that is not
16971 too common scenario. */
16973 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
16974 compare_seq = get_insns ();
16977 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
16978 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
16979 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
16981 code = GET_CODE (compare_op);
16983 if (code != LTU && code != GEU)
16986 emit_insn (compare_seq);
16991 if (!INTEGRAL_MODE_P (mode))
17000 /* Convert a==0 into (unsigned)a<1. */
17003 if (op1 != const0_rtx)
17006 code = (code == EQ ? LTU : GEU);
17009 /* Convert a>b into b<a or a>=b-1. */
17012 if (CONST_INT_P (op1))
17014 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
17015 /* Bail out on overflow. We still can swap operands but that
17016 would force loading of the constant into register. */
17017 if (op1 == const0_rtx
17018 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
17020 code = (code == GTU ? GEU : LTU);
17027 code = (code == GTU ? LTU : GEU);
17031 /* Convert a>=0 into (unsigned)a<0x80000000. */
17034 if (mode == DImode || op1 != const0_rtx)
17036 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
17037 code = (code == LT ? GEU : LTU);
17041 if (mode == DImode || op1 != constm1_rtx)
17043 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
17044 code = (code == LE ? GEU : LTU);
17050 /* Swapping operands may cause constant to appear as first operand. */
17051 if (!nonimmediate_operand (op0, VOIDmode))
17053 if (!can_create_pseudo_p ())
17055 op0 = force_reg (mode, op0);
17057 *pop = ix86_expand_compare (code, op0, op1);
17058 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
17063 ix86_expand_int_movcc (rtx operands[])
17065 enum rtx_code code = GET_CODE (operands[1]), compare_code;
17066 rtx compare_seq, compare_op;
17067 enum machine_mode mode = GET_MODE (operands[0]);
17068 bool sign_bit_compare_p = false;
17069 rtx op0 = XEXP (operands[1], 0);
17070 rtx op1 = XEXP (operands[1], 1);
17073 compare_op = ix86_expand_compare (code, op0, op1);
17074 compare_seq = get_insns ();
17077 compare_code = GET_CODE (compare_op);
17079 if ((op1 == const0_rtx && (code == GE || code == LT))
17080 || (op1 == constm1_rtx && (code == GT || code == LE)))
17081 sign_bit_compare_p = true;
17083 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
17084 HImode insns, we'd be swallowed in word prefix ops. */
17086 if ((mode != HImode || TARGET_FAST_PREFIX)
17087 && (mode != (TARGET_64BIT ? TImode : DImode))
17088 && CONST_INT_P (operands[2])
17089 && CONST_INT_P (operands[3]))
17091 rtx out = operands[0];
17092 HOST_WIDE_INT ct = INTVAL (operands[2]);
17093 HOST_WIDE_INT cf = INTVAL (operands[3]);
17094 HOST_WIDE_INT diff;
17097 /* Sign bit compares are better done using shifts than we do by using
17099 if (sign_bit_compare_p
17100 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
17102 /* Detect overlap between destination and compare sources. */
17105 if (!sign_bit_compare_p)
17108 bool fpcmp = false;
17110 compare_code = GET_CODE (compare_op);
17112 flags = XEXP (compare_op, 0);
17114 if (GET_MODE (flags) == CCFPmode
17115 || GET_MODE (flags) == CCFPUmode)
17119 = ix86_fp_compare_code_to_integer (compare_code);
17122 /* To simplify rest of code, restrict to the GEU case. */
17123 if (compare_code == LTU)
17125 HOST_WIDE_INT tmp = ct;
17128 compare_code = reverse_condition (compare_code);
17129 code = reverse_condition (code);
17134 PUT_CODE (compare_op,
17135 reverse_condition_maybe_unordered
17136 (GET_CODE (compare_op)));
17138 PUT_CODE (compare_op,
17139 reverse_condition (GET_CODE (compare_op)));
17143 if (reg_overlap_mentioned_p (out, op0)
17144 || reg_overlap_mentioned_p (out, op1))
17145 tmp = gen_reg_rtx (mode);
17147 if (mode == DImode)
17148 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
17150 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
17151 flags, compare_op));
17155 if (code == GT || code == GE)
17156 code = reverse_condition (code);
17159 HOST_WIDE_INT tmp = ct;
17164 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
17177 tmp = expand_simple_binop (mode, PLUS,
17179 copy_rtx (tmp), 1, OPTAB_DIRECT);
17190 tmp = expand_simple_binop (mode, IOR,
17192 copy_rtx (tmp), 1, OPTAB_DIRECT);
17194 else if (diff == -1 && ct)
17204 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
17206 tmp = expand_simple_binop (mode, PLUS,
17207 copy_rtx (tmp), GEN_INT (cf),
17208 copy_rtx (tmp), 1, OPTAB_DIRECT);
17216 * andl cf - ct, dest
17226 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
17229 tmp = expand_simple_binop (mode, AND,
17231 gen_int_mode (cf - ct, mode),
17232 copy_rtx (tmp), 1, OPTAB_DIRECT);
17234 tmp = expand_simple_binop (mode, PLUS,
17235 copy_rtx (tmp), GEN_INT (ct),
17236 copy_rtx (tmp), 1, OPTAB_DIRECT);
17239 if (!rtx_equal_p (tmp, out))
17240 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
17247 enum machine_mode cmp_mode = GET_MODE (op0);
17250 tmp = ct, ct = cf, cf = tmp;
17253 if (SCALAR_FLOAT_MODE_P (cmp_mode))
17255 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
17257 /* We may be reversing unordered compare to normal compare, that
17258 is not valid in general (we may convert non-trapping condition
17259 to trapping one), however on i386 we currently emit all
17260 comparisons unordered. */
17261 compare_code = reverse_condition_maybe_unordered (compare_code);
17262 code = reverse_condition_maybe_unordered (code);
17266 compare_code = reverse_condition (compare_code);
17267 code = reverse_condition (code);
17271 compare_code = UNKNOWN;
17272 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
17273 && CONST_INT_P (op1))
17275 if (op1 == const0_rtx
17276 && (code == LT || code == GE))
17277 compare_code = code;
17278 else if (op1 == constm1_rtx)
17282 else if (code == GT)
17287 /* Optimize dest = (op0 < 0) ? -1 : cf. */
17288 if (compare_code != UNKNOWN
17289 && GET_MODE (op0) == GET_MODE (out)
17290 && (cf == -1 || ct == -1))
17292 /* If lea code below could be used, only optimize
17293 if it results in a 2 insn sequence. */
17295 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
17296 || diff == 3 || diff == 5 || diff == 9)
17297 || (compare_code == LT && ct == -1)
17298 || (compare_code == GE && cf == -1))
17301 * notl op1 (if necessary)
17309 code = reverse_condition (code);
17312 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
17314 out = expand_simple_binop (mode, IOR,
17316 out, 1, OPTAB_DIRECT);
17317 if (out != operands[0])
17318 emit_move_insn (operands[0], out);
17325 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
17326 || diff == 3 || diff == 5 || diff == 9)
17327 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
17329 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
17335 * lea cf(dest*(ct-cf)),dest
17339 * This also catches the degenerate setcc-only case.
17345 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
17348 /* On x86_64 the lea instruction operates on Pmode, so we need
17349 to get arithmetics done in proper mode to match. */
17351 tmp = copy_rtx (out);
17355 out1 = copy_rtx (out);
17356 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
17360 tmp = gen_rtx_PLUS (mode, tmp, out1);
17366 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
17369 if (!rtx_equal_p (tmp, out))
17372 out = force_operand (tmp, copy_rtx (out));
17374 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
17376 if (!rtx_equal_p (out, operands[0]))
17377 emit_move_insn (operands[0], copy_rtx (out));
17383 * General case: Jumpful:
17384 * xorl dest,dest cmpl op1, op2
17385 * cmpl op1, op2 movl ct, dest
17386 * setcc dest jcc 1f
17387 * decl dest movl cf, dest
17388 * andl (cf-ct),dest 1:
17391 * Size 20. Size 14.
17393 * This is reasonably steep, but branch mispredict costs are
17394 * high on modern cpus, so consider failing only if optimizing
17398 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
17399 && BRANCH_COST (optimize_insn_for_speed_p (),
17404 enum machine_mode cmp_mode = GET_MODE (op0);
17409 if (SCALAR_FLOAT_MODE_P (cmp_mode))
17411 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
17413 /* We may be reversing unordered compare to normal compare,
17414 that is not valid in general (we may convert non-trapping
17415 condition to trapping one), however on i386 we currently
17416 emit all comparisons unordered. */
17417 code = reverse_condition_maybe_unordered (code);
17421 code = reverse_condition (code);
17422 if (compare_code != UNKNOWN)
17423 compare_code = reverse_condition (compare_code);
17427 if (compare_code != UNKNOWN)
17429 /* notl op1 (if needed)
17434 For x < 0 (resp. x <= -1) there will be no notl,
17435 so if possible swap the constants to get rid of the
17437 True/false will be -1/0 while code below (store flag
17438 followed by decrement) is 0/-1, so the constants need
17439 to be exchanged once more. */
17441 if (compare_code == GE || !cf)
17443 code = reverse_condition (code);
17448 HOST_WIDE_INT tmp = cf;
17453 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
17457 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
17459 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
17461 copy_rtx (out), 1, OPTAB_DIRECT);
17464 out = expand_simple_binop (mode, AND, copy_rtx (out),
17465 gen_int_mode (cf - ct, mode),
17466 copy_rtx (out), 1, OPTAB_DIRECT);
17468 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
17469 copy_rtx (out), 1, OPTAB_DIRECT);
17470 if (!rtx_equal_p (out, operands[0]))
17471 emit_move_insn (operands[0], copy_rtx (out));
17477 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
17479 /* Try a few things more with specific constants and a variable. */
17482 rtx var, orig_out, out, tmp;
17484 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
17487 /* If one of the two operands is an interesting constant, load a
17488 constant with the above and mask it in with a logical operation. */
17490 if (CONST_INT_P (operands[2]))
17493 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
17494 operands[3] = constm1_rtx, op = and_optab;
17495 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
17496 operands[3] = const0_rtx, op = ior_optab;
17500 else if (CONST_INT_P (operands[3]))
17503 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
17504 operands[2] = constm1_rtx, op = and_optab;
17505 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
17506 operands[2] = const0_rtx, op = ior_optab;
17513 orig_out = operands[0];
17514 tmp = gen_reg_rtx (mode);
17517 /* Recurse to get the constant loaded. */
17518 if (ix86_expand_int_movcc (operands) == 0)
17521 /* Mask in the interesting variable. */
17522 out = expand_binop (mode, op, var, tmp, orig_out, 0,
17524 if (!rtx_equal_p (out, orig_out))
17525 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
17531 * For comparison with above,
17541 if (! nonimmediate_operand (operands[2], mode))
17542 operands[2] = force_reg (mode, operands[2]);
17543 if (! nonimmediate_operand (operands[3], mode))
17544 operands[3] = force_reg (mode, operands[3]);
17546 if (! register_operand (operands[2], VOIDmode)
17548 || ! register_operand (operands[3], VOIDmode)))
17549 operands[2] = force_reg (mode, operands[2]);
17552 && ! register_operand (operands[3], VOIDmode))
17553 operands[3] = force_reg (mode, operands[3]);
17555 emit_insn (compare_seq);
17556 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17557 gen_rtx_IF_THEN_ELSE (mode,
17558 compare_op, operands[2],
17563 /* Swap, force into registers, or otherwise massage the two operands
17564 to an sse comparison with a mask result. Thus we differ a bit from
17565 ix86_prepare_fp_compare_args which expects to produce a flags result.
17567 The DEST operand exists to help determine whether to commute commutative
17568 operators. The POP0/POP1 operands are updated in place. The new
17569 comparison code is returned, or UNKNOWN if not implementable. */
17571 static enum rtx_code
17572 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
17573 rtx *pop0, rtx *pop1)
17581 /* We have no LTGT as an operator. We could implement it with
17582 NE & ORDERED, but this requires an extra temporary. It's
17583 not clear that it's worth it. */
17590 /* These are supported directly. */
17597 /* For commutative operators, try to canonicalize the destination
17598 operand to be first in the comparison - this helps reload to
17599 avoid extra moves. */
17600 if (!dest || !rtx_equal_p (dest, *pop1))
17608 /* These are not supported directly. Swap the comparison operands
17609 to transform into something that is supported. */
17613 code = swap_condition (code);
17617 gcc_unreachable ();
17623 /* Detect conditional moves that exactly match min/max operational
17624 semantics. Note that this is IEEE safe, as long as we don't
17625 interchange the operands.
17627 Returns FALSE if this conditional move doesn't match a MIN/MAX,
17628 and TRUE if the operation is successful and instructions are emitted. */
17631 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
17632 rtx cmp_op1, rtx if_true, rtx if_false)
17634 enum machine_mode mode;
17640 else if (code == UNGE)
17643 if_true = if_false;
17649 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
17651 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
17656 mode = GET_MODE (dest);
17658 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
17659 but MODE may be a vector mode and thus not appropriate. */
17660 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
17662 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
17665 if_true = force_reg (mode, if_true);
17666 v = gen_rtvec (2, if_true, if_false);
17667 tmp = gen_rtx_UNSPEC (mode, v, u);
17671 code = is_min ? SMIN : SMAX;
17672 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
17675 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
17679 /* Expand an sse vector comparison. Return the register with the result. */
17682 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
17683 rtx op_true, rtx op_false)
17685 enum machine_mode mode = GET_MODE (dest);
17688 cmp_op0 = force_reg (mode, cmp_op0);
17689 if (!nonimmediate_operand (cmp_op1, mode))
17690 cmp_op1 = force_reg (mode, cmp_op1);
17693 || reg_overlap_mentioned_p (dest, op_true)
17694 || reg_overlap_mentioned_p (dest, op_false))
17695 dest = gen_reg_rtx (mode);
17697 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
17698 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17703 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
17704 operations. This is used for both scalar and vector conditional moves. */
17707 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
17709 enum machine_mode mode = GET_MODE (dest);
17712 if (op_false == CONST0_RTX (mode))
17714 op_true = force_reg (mode, op_true);
17715 x = gen_rtx_AND (mode, cmp, op_true);
17716 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17718 else if (op_true == CONST0_RTX (mode))
17720 op_false = force_reg (mode, op_false);
17721 x = gen_rtx_NOT (mode, cmp);
17722 x = gen_rtx_AND (mode, x, op_false);
17723 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17725 else if (TARGET_XOP)
17727 rtx pcmov = gen_rtx_SET (mode, dest,
17728 gen_rtx_IF_THEN_ELSE (mode, cmp,
17735 op_true = force_reg (mode, op_true);
17736 op_false = force_reg (mode, op_false);
17738 t2 = gen_reg_rtx (mode);
17740 t3 = gen_reg_rtx (mode);
17744 x = gen_rtx_AND (mode, op_true, cmp);
17745 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
17747 x = gen_rtx_NOT (mode, cmp);
17748 x = gen_rtx_AND (mode, x, op_false);
17749 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
17751 x = gen_rtx_IOR (mode, t3, t2);
17752 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17756 /* Expand a floating-point conditional move. Return true if successful. */
17759 ix86_expand_fp_movcc (rtx operands[])
17761 enum machine_mode mode = GET_MODE (operands[0]);
17762 enum rtx_code code = GET_CODE (operands[1]);
17763 rtx tmp, compare_op;
17764 rtx op0 = XEXP (operands[1], 0);
17765 rtx op1 = XEXP (operands[1], 1);
17767 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
17769 enum machine_mode cmode;
17771 /* Since we've no cmove for sse registers, don't force bad register
17772 allocation just to gain access to it. Deny movcc when the
17773 comparison mode doesn't match the move mode. */
17774 cmode = GET_MODE (op0);
17775 if (cmode == VOIDmode)
17776 cmode = GET_MODE (op1);
17780 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
17781 if (code == UNKNOWN)
17784 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
17785 operands[2], operands[3]))
17788 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
17789 operands[2], operands[3]);
17790 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
17794 /* The floating point conditional move instructions don't directly
17795 support conditions resulting from a signed integer comparison. */
17797 compare_op = ix86_expand_compare (code, op0, op1);
17798 if (!fcmov_comparison_operator (compare_op, VOIDmode))
17800 tmp = gen_reg_rtx (QImode);
17801 ix86_expand_setcc (tmp, code, op0, op1);
17803 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
17806 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17807 gen_rtx_IF_THEN_ELSE (mode, compare_op,
17808 operands[2], operands[3])));
17813 /* Expand a floating-point vector conditional move; a vcond operation
17814 rather than a movcc operation. */
17817 ix86_expand_fp_vcond (rtx operands[])
17819 enum rtx_code code = GET_CODE (operands[3]);
17822 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
17823 &operands[4], &operands[5]);
17824 if (code == UNKNOWN)
17827 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
17828 operands[5], operands[1], operands[2]))
17831 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
17832 operands[1], operands[2]);
17833 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
17837 /* Expand a signed/unsigned integral vector conditional move. */
17840 ix86_expand_int_vcond (rtx operands[])
17842 enum machine_mode mode = GET_MODE (operands[0]);
17843 enum rtx_code code = GET_CODE (operands[3]);
17844 bool negate = false;
17847 cop0 = operands[4];
17848 cop1 = operands[5];
17850 /* XOP supports all of the comparisons on all vector int types. */
17853 /* Canonicalize the comparison to EQ, GT, GTU. */
17864 code = reverse_condition (code);
17870 code = reverse_condition (code);
17876 code = swap_condition (code);
17877 x = cop0, cop0 = cop1, cop1 = x;
17881 gcc_unreachable ();
17884 /* Only SSE4.1/SSE4.2 supports V2DImode. */
17885 if (mode == V2DImode)
17890 /* SSE4.1 supports EQ. */
17891 if (!TARGET_SSE4_1)
17897 /* SSE4.2 supports GT/GTU. */
17898 if (!TARGET_SSE4_2)
17903 gcc_unreachable ();
17907 /* Unsigned parallel compare is not supported by the hardware.
17908 Play some tricks to turn this into a signed comparison
17912 cop0 = force_reg (mode, cop0);
17920 rtx (*gen_sub3) (rtx, rtx, rtx);
17922 /* Subtract (-(INT MAX) - 1) from both operands to make
17924 mask = ix86_build_signbit_mask (mode, true, false);
17925 gen_sub3 = (mode == V4SImode
17926 ? gen_subv4si3 : gen_subv2di3);
17927 t1 = gen_reg_rtx (mode);
17928 emit_insn (gen_sub3 (t1, cop0, mask));
17930 t2 = gen_reg_rtx (mode);
17931 emit_insn (gen_sub3 (t2, cop1, mask));
17941 /* Perform a parallel unsigned saturating subtraction. */
17942 x = gen_reg_rtx (mode);
17943 emit_insn (gen_rtx_SET (VOIDmode, x,
17944 gen_rtx_US_MINUS (mode, cop0, cop1)));
17947 cop1 = CONST0_RTX (mode);
17953 gcc_unreachable ();
17958 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
17959 operands[1+negate], operands[2-negate]);
17961 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
17962 operands[2-negate]);
17966 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
17967 true if we should do zero extension, else sign extension. HIGH_P is
17968 true if we want the N/2 high elements, else the low elements. */
17971 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
17973 enum machine_mode imode = GET_MODE (operands[1]);
17974 rtx (*unpack)(rtx, rtx, rtx);
17981 unpack = gen_vec_interleave_highv16qi;
17983 unpack = gen_vec_interleave_lowv16qi;
17987 unpack = gen_vec_interleave_highv8hi;
17989 unpack = gen_vec_interleave_lowv8hi;
17993 unpack = gen_vec_interleave_highv4si;
17995 unpack = gen_vec_interleave_lowv4si;
17998 gcc_unreachable ();
18001 dest = gen_lowpart (imode, operands[0]);
18004 se = force_reg (imode, CONST0_RTX (imode));
18006 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
18007 operands[1], pc_rtx, pc_rtx);
18009 emit_insn (unpack (dest, operands[1], se));
18012 /* This function performs the same task as ix86_expand_sse_unpack,
18013 but with SSE4.1 instructions. */
18016 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
18018 enum machine_mode imode = GET_MODE (operands[1]);
18019 rtx (*unpack)(rtx, rtx);
18026 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
18028 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
18032 unpack = gen_sse4_1_zero_extendv4hiv4si2;
18034 unpack = gen_sse4_1_sign_extendv4hiv4si2;
18038 unpack = gen_sse4_1_zero_extendv2siv2di2;
18040 unpack = gen_sse4_1_sign_extendv2siv2di2;
18043 gcc_unreachable ();
18046 dest = operands[0];
18049 /* Shift higher 8 bytes to lower 8 bytes. */
18050 src = gen_reg_rtx (imode);
18051 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, src),
18052 gen_lowpart (V1TImode, operands[1]),
18058 emit_insn (unpack (dest, src));
18061 /* Expand conditional increment or decrement using adb/sbb instructions.
18062 The default case using setcc followed by the conditional move can be
18063 done by generic code. */
18065 ix86_expand_int_addcc (rtx operands[])
18067 enum rtx_code code = GET_CODE (operands[1]);
18069 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
18071 rtx val = const0_rtx;
18072 bool fpcmp = false;
18073 enum machine_mode mode;
18074 rtx op0 = XEXP (operands[1], 0);
18075 rtx op1 = XEXP (operands[1], 1);
18077 if (operands[3] != const1_rtx
18078 && operands[3] != constm1_rtx)
18080 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
18082 code = GET_CODE (compare_op);
18084 flags = XEXP (compare_op, 0);
18086 if (GET_MODE (flags) == CCFPmode
18087 || GET_MODE (flags) == CCFPUmode)
18090 code = ix86_fp_compare_code_to_integer (code);
18097 PUT_CODE (compare_op,
18098 reverse_condition_maybe_unordered
18099 (GET_CODE (compare_op)));
18101 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
18104 mode = GET_MODE (operands[0]);
18106 /* Construct either adc or sbb insn. */
18107 if ((code == LTU) == (operands[3] == constm1_rtx))
18112 insn = gen_subqi3_carry;
18115 insn = gen_subhi3_carry;
18118 insn = gen_subsi3_carry;
18121 insn = gen_subdi3_carry;
18124 gcc_unreachable ();
18132 insn = gen_addqi3_carry;
18135 insn = gen_addhi3_carry;
18138 insn = gen_addsi3_carry;
18141 insn = gen_adddi3_carry;
18144 gcc_unreachable ();
18147 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
18153 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
18154 but works for floating pointer parameters and nonoffsetable memories.
18155 For pushes, it returns just stack offsets; the values will be saved
18156 in the right order. Maximally three parts are generated. */
18159 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
18164 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
18166 size = (GET_MODE_SIZE (mode) + 4) / 8;
18168 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
18169 gcc_assert (size >= 2 && size <= 4);
18171 /* Optimize constant pool reference to immediates. This is used by fp
18172 moves, that force all constants to memory to allow combining. */
18173 if (MEM_P (operand) && MEM_READONLY_P (operand))
18175 rtx tmp = maybe_get_pool_constant (operand);
18180 if (MEM_P (operand) && !offsettable_memref_p (operand))
18182 /* The only non-offsetable memories we handle are pushes. */
18183 int ok = push_operand (operand, VOIDmode);
18187 operand = copy_rtx (operand);
18188 PUT_MODE (operand, Pmode);
18189 parts[0] = parts[1] = parts[2] = parts[3] = operand;
18193 if (GET_CODE (operand) == CONST_VECTOR)
18195 enum machine_mode imode = int_mode_for_mode (mode);
18196 /* Caution: if we looked through a constant pool memory above,
18197 the operand may actually have a different mode now. That's
18198 ok, since we want to pun this all the way back to an integer. */
18199 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
18200 gcc_assert (operand != NULL);
18206 if (mode == DImode)
18207 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
18212 if (REG_P (operand))
18214 gcc_assert (reload_completed);
18215 for (i = 0; i < size; i++)
18216 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
18218 else if (offsettable_memref_p (operand))
18220 operand = adjust_address (operand, SImode, 0);
18221 parts[0] = operand;
18222 for (i = 1; i < size; i++)
18223 parts[i] = adjust_address (operand, SImode, 4 * i);
18225 else if (GET_CODE (operand) == CONST_DOUBLE)
18230 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
18234 real_to_target (l, &r, mode);
18235 parts[3] = gen_int_mode (l[3], SImode);
18236 parts[2] = gen_int_mode (l[2], SImode);
18239 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
18240 parts[2] = gen_int_mode (l[2], SImode);
18243 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
18246 gcc_unreachable ();
18248 parts[1] = gen_int_mode (l[1], SImode);
18249 parts[0] = gen_int_mode (l[0], SImode);
18252 gcc_unreachable ();
18257 if (mode == TImode)
18258 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
18259 if (mode == XFmode || mode == TFmode)
18261 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
18262 if (REG_P (operand))
18264 gcc_assert (reload_completed);
18265 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
18266 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
18268 else if (offsettable_memref_p (operand))
18270 operand = adjust_address (operand, DImode, 0);
18271 parts[0] = operand;
18272 parts[1] = adjust_address (operand, upper_mode, 8);
18274 else if (GET_CODE (operand) == CONST_DOUBLE)
18279 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
18280 real_to_target (l, &r, mode);
18282 /* Do not use shift by 32 to avoid warning on 32bit systems. */
18283 if (HOST_BITS_PER_WIDE_INT >= 64)
18286 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
18287 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
18290 parts[0] = immed_double_const (l[0], l[1], DImode);
18292 if (upper_mode == SImode)
18293 parts[1] = gen_int_mode (l[2], SImode);
18294 else if (HOST_BITS_PER_WIDE_INT >= 64)
18297 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
18298 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
18301 parts[1] = immed_double_const (l[2], l[3], DImode);
18304 gcc_unreachable ();
18311 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
18312 Return false when normal moves are needed; true when all required
18313 insns have been emitted. Operands 2-4 contain the input values
18314 int the correct order; operands 5-7 contain the output values. */
18317 ix86_split_long_move (rtx operands[])
18322 int collisions = 0;
18323 enum machine_mode mode = GET_MODE (operands[0]);
18324 bool collisionparts[4];
18326 /* The DFmode expanders may ask us to move double.
18327 For 64bit target this is single move. By hiding the fact
18328 here we simplify i386.md splitters. */
18329 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
18331 /* Optimize constant pool reference to immediates. This is used by
18332 fp moves, that force all constants to memory to allow combining. */
18334 if (MEM_P (operands[1])
18335 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
18336 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
18337 operands[1] = get_pool_constant (XEXP (operands[1], 0));
18338 if (push_operand (operands[0], VOIDmode))
18340 operands[0] = copy_rtx (operands[0]);
18341 PUT_MODE (operands[0], Pmode);
18344 operands[0] = gen_lowpart (DImode, operands[0]);
18345 operands[1] = gen_lowpart (DImode, operands[1]);
18346 emit_move_insn (operands[0], operands[1]);
18350 /* The only non-offsettable memory we handle is push. */
18351 if (push_operand (operands[0], VOIDmode))
18354 gcc_assert (!MEM_P (operands[0])
18355 || offsettable_memref_p (operands[0]));
18357 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
18358 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
18360 /* When emitting push, take care for source operands on the stack. */
18361 if (push && MEM_P (operands[1])
18362 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
18364 rtx src_base = XEXP (part[1][nparts - 1], 0);
18366 /* Compensate for the stack decrement by 4. */
18367 if (!TARGET_64BIT && nparts == 3
18368 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
18369 src_base = plus_constant (src_base, 4);
18371 /* src_base refers to the stack pointer and is
18372 automatically decreased by emitted push. */
18373 for (i = 0; i < nparts; i++)
18374 part[1][i] = change_address (part[1][i],
18375 GET_MODE (part[1][i]), src_base);
18378 /* We need to do copy in the right order in case an address register
18379 of the source overlaps the destination. */
18380 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
18384 for (i = 0; i < nparts; i++)
18387 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
18388 if (collisionparts[i])
18392 /* Collision in the middle part can be handled by reordering. */
18393 if (collisions == 1 && nparts == 3 && collisionparts [1])
18395 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
18396 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
18398 else if (collisions == 1
18400 && (collisionparts [1] || collisionparts [2]))
18402 if (collisionparts [1])
18404 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
18405 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
18409 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
18410 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
18414 /* If there are more collisions, we can't handle it by reordering.
18415 Do an lea to the last part and use only one colliding move. */
18416 else if (collisions > 1)
18422 base = part[0][nparts - 1];
18424 /* Handle the case when the last part isn't valid for lea.
18425 Happens in 64-bit mode storing the 12-byte XFmode. */
18426 if (GET_MODE (base) != Pmode)
18427 base = gen_rtx_REG (Pmode, REGNO (base));
18429 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
18430 part[1][0] = replace_equiv_address (part[1][0], base);
18431 for (i = 1; i < nparts; i++)
18433 tmp = plus_constant (base, UNITS_PER_WORD * i);
18434 part[1][i] = replace_equiv_address (part[1][i], tmp);
18445 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
18446 emit_insn (gen_addsi3 (stack_pointer_rtx,
18447 stack_pointer_rtx, GEN_INT (-4)));
18448 emit_move_insn (part[0][2], part[1][2]);
18450 else if (nparts == 4)
18452 emit_move_insn (part[0][3], part[1][3]);
18453 emit_move_insn (part[0][2], part[1][2]);
18458 /* In 64bit mode we don't have 32bit push available. In case this is
18459 register, it is OK - we will just use larger counterpart. We also
18460 retype memory - these comes from attempt to avoid REX prefix on
18461 moving of second half of TFmode value. */
18462 if (GET_MODE (part[1][1]) == SImode)
18464 switch (GET_CODE (part[1][1]))
18467 part[1][1] = adjust_address (part[1][1], DImode, 0);
18471 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
18475 gcc_unreachable ();
18478 if (GET_MODE (part[1][0]) == SImode)
18479 part[1][0] = part[1][1];
18482 emit_move_insn (part[0][1], part[1][1]);
18483 emit_move_insn (part[0][0], part[1][0]);
18487 /* Choose correct order to not overwrite the source before it is copied. */
18488 if ((REG_P (part[0][0])
18489 && REG_P (part[1][1])
18490 && (REGNO (part[0][0]) == REGNO (part[1][1])
18492 && REGNO (part[0][0]) == REGNO (part[1][2]))
18494 && REGNO (part[0][0]) == REGNO (part[1][3]))))
18496 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
18498 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
18500 operands[2 + i] = part[0][j];
18501 operands[6 + i] = part[1][j];
18506 for (i = 0; i < nparts; i++)
18508 operands[2 + i] = part[0][i];
18509 operands[6 + i] = part[1][i];
18513 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
18514 if (optimize_insn_for_size_p ())
18516 for (j = 0; j < nparts - 1; j++)
18517 if (CONST_INT_P (operands[6 + j])
18518 && operands[6 + j] != const0_rtx
18519 && REG_P (operands[2 + j]))
18520 for (i = j; i < nparts - 1; i++)
18521 if (CONST_INT_P (operands[7 + i])
18522 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
18523 operands[7 + i] = operands[2 + j];
18526 for (i = 0; i < nparts; i++)
18527 emit_move_insn (operands[2 + i], operands[6 + i]);
18532 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
18533 left shift by a constant, either using a single shift or
18534 a sequence of add instructions. */
18537 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
18539 rtx (*insn)(rtx, rtx, rtx);
18542 || (count * ix86_cost->add <= ix86_cost->shift_const
18543 && !optimize_insn_for_size_p ()))
18545 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
18546 while (count-- > 0)
18547 emit_insn (insn (operand, operand, operand));
18551 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
18552 emit_insn (insn (operand, operand, GEN_INT (count)));
18557 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
18559 rtx (*gen_ashl3)(rtx, rtx, rtx);
18560 rtx (*gen_shld)(rtx, rtx, rtx);
18561 int half_width = GET_MODE_BITSIZE (mode) >> 1;
18563 rtx low[2], high[2];
18566 if (CONST_INT_P (operands[2]))
18568 split_double_mode (mode, operands, 2, low, high);
18569 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
18571 if (count >= half_width)
18573 emit_move_insn (high[0], low[1]);
18574 emit_move_insn (low[0], const0_rtx);
18576 if (count > half_width)
18577 ix86_expand_ashl_const (high[0], count - half_width, mode);
18581 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
18583 if (!rtx_equal_p (operands[0], operands[1]))
18584 emit_move_insn (operands[0], operands[1]);
18586 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
18587 ix86_expand_ashl_const (low[0], count, mode);
18592 split_double_mode (mode, operands, 1, low, high);
18594 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
18596 if (operands[1] == const1_rtx)
18598 /* Assuming we've chosen a QImode capable registers, then 1 << N
18599 can be done with two 32/64-bit shifts, no branches, no cmoves. */
18600 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
18602 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
18604 ix86_expand_clear (low[0]);
18605 ix86_expand_clear (high[0]);
18606 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
18608 d = gen_lowpart (QImode, low[0]);
18609 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
18610 s = gen_rtx_EQ (QImode, flags, const0_rtx);
18611 emit_insn (gen_rtx_SET (VOIDmode, d, s));
18613 d = gen_lowpart (QImode, high[0]);
18614 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
18615 s = gen_rtx_NE (QImode, flags, const0_rtx);
18616 emit_insn (gen_rtx_SET (VOIDmode, d, s));
18619 /* Otherwise, we can get the same results by manually performing
18620 a bit extract operation on bit 5/6, and then performing the two
18621 shifts. The two methods of getting 0/1 into low/high are exactly
18622 the same size. Avoiding the shift in the bit extract case helps
18623 pentium4 a bit; no one else seems to care much either way. */
18626 enum machine_mode half_mode;
18627 rtx (*gen_lshr3)(rtx, rtx, rtx);
18628 rtx (*gen_and3)(rtx, rtx, rtx);
18629 rtx (*gen_xor3)(rtx, rtx, rtx);
18630 HOST_WIDE_INT bits;
18633 if (mode == DImode)
18635 half_mode = SImode;
18636 gen_lshr3 = gen_lshrsi3;
18637 gen_and3 = gen_andsi3;
18638 gen_xor3 = gen_xorsi3;
18643 half_mode = DImode;
18644 gen_lshr3 = gen_lshrdi3;
18645 gen_and3 = gen_anddi3;
18646 gen_xor3 = gen_xordi3;
18650 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
18651 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
18653 x = gen_lowpart (half_mode, operands[2]);
18654 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
18656 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
18657 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
18658 emit_move_insn (low[0], high[0]);
18659 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
18662 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
18663 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
18667 if (operands[1] == constm1_rtx)
18669 /* For -1 << N, we can avoid the shld instruction, because we
18670 know that we're shifting 0...31/63 ones into a -1. */
18671 emit_move_insn (low[0], constm1_rtx);
18672 if (optimize_insn_for_size_p ())
18673 emit_move_insn (high[0], low[0]);
18675 emit_move_insn (high[0], constm1_rtx);
18679 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
18681 if (!rtx_equal_p (operands[0], operands[1]))
18682 emit_move_insn (operands[0], operands[1]);
18684 split_double_mode (mode, operands, 1, low, high);
18685 emit_insn (gen_shld (high[0], low[0], operands[2]));
18688 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
18690 if (TARGET_CMOVE && scratch)
18692 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
18693 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
18695 ix86_expand_clear (scratch);
18696 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
18700 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
18701 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
18703 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
18708 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
18710 rtx (*gen_ashr3)(rtx, rtx, rtx)
18711 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
18712 rtx (*gen_shrd)(rtx, rtx, rtx);
18713 int half_width = GET_MODE_BITSIZE (mode) >> 1;
18715 rtx low[2], high[2];
18718 if (CONST_INT_P (operands[2]))
18720 split_double_mode (mode, operands, 2, low, high);
18721 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
18723 if (count == GET_MODE_BITSIZE (mode) - 1)
18725 emit_move_insn (high[0], high[1]);
18726 emit_insn (gen_ashr3 (high[0], high[0],
18727 GEN_INT (half_width - 1)));
18728 emit_move_insn (low[0], high[0]);
18731 else if (count >= half_width)
18733 emit_move_insn (low[0], high[1]);
18734 emit_move_insn (high[0], low[0]);
18735 emit_insn (gen_ashr3 (high[0], high[0],
18736 GEN_INT (half_width - 1)));
18738 if (count > half_width)
18739 emit_insn (gen_ashr3 (low[0], low[0],
18740 GEN_INT (count - half_width)));
18744 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
18746 if (!rtx_equal_p (operands[0], operands[1]))
18747 emit_move_insn (operands[0], operands[1]);
18749 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
18750 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
18755 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
18757 if (!rtx_equal_p (operands[0], operands[1]))
18758 emit_move_insn (operands[0], operands[1]);
18760 split_double_mode (mode, operands, 1, low, high);
18762 emit_insn (gen_shrd (low[0], high[0], operands[2]));
18763 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
18765 if (TARGET_CMOVE && scratch)
18767 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
18768 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
18770 emit_move_insn (scratch, high[0]);
18771 emit_insn (gen_ashr3 (scratch, scratch,
18772 GEN_INT (half_width - 1)));
18773 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
18778 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
18779 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
18781 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
18787 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
18789 rtx (*gen_lshr3)(rtx, rtx, rtx)
18790 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
18791 rtx (*gen_shrd)(rtx, rtx, rtx);
18792 int half_width = GET_MODE_BITSIZE (mode) >> 1;
18794 rtx low[2], high[2];
18797 if (CONST_INT_P (operands[2]))
18799 split_double_mode (mode, operands, 2, low, high);
18800 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
18802 if (count >= half_width)
18804 emit_move_insn (low[0], high[1]);
18805 ix86_expand_clear (high[0]);
18807 if (count > half_width)
18808 emit_insn (gen_lshr3 (low[0], low[0],
18809 GEN_INT (count - half_width)));
18813 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
18815 if (!rtx_equal_p (operands[0], operands[1]))
18816 emit_move_insn (operands[0], operands[1]);
18818 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
18819 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
18824 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
18826 if (!rtx_equal_p (operands[0], operands[1]))
18827 emit_move_insn (operands[0], operands[1]);
18829 split_double_mode (mode, operands, 1, low, high);
18831 emit_insn (gen_shrd (low[0], high[0], operands[2]));
18832 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
18834 if (TARGET_CMOVE && scratch)
18836 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
18837 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
18839 ix86_expand_clear (scratch);
18840 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
18845 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
18846 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
18848 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
18853 /* Predict just emitted jump instruction to be taken with probability PROB. */
18855 predict_jump (int prob)
18857 rtx insn = get_last_insn ();
18858 gcc_assert (JUMP_P (insn));
18859 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
18862 /* Helper function for the string operations below. Dest VARIABLE whether
18863 it is aligned to VALUE bytes. If true, jump to the label. */
18865 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
18867 rtx label = gen_label_rtx ();
18868 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
18869 if (GET_MODE (variable) == DImode)
18870 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
18872 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
18873 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
18876 predict_jump (REG_BR_PROB_BASE * 50 / 100);
18878 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18882 /* Adjust COUNTER by the VALUE. */
18884 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
18886 rtx (*gen_add)(rtx, rtx, rtx)
18887 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
18889 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
18892 /* Zero extend possibly SImode EXP to Pmode register. */
18894 ix86_zero_extend_to_Pmode (rtx exp)
18897 if (GET_MODE (exp) == VOIDmode)
18898 return force_reg (Pmode, exp);
18899 if (GET_MODE (exp) == Pmode)
18900 return copy_to_mode_reg (Pmode, exp);
18901 r = gen_reg_rtx (Pmode);
18902 emit_insn (gen_zero_extendsidi2 (r, exp));
18906 /* Divide COUNTREG by SCALE. */
18908 scale_counter (rtx countreg, int scale)
18914 if (CONST_INT_P (countreg))
18915 return GEN_INT (INTVAL (countreg) / scale);
18916 gcc_assert (REG_P (countreg));
18918 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
18919 GEN_INT (exact_log2 (scale)),
18920 NULL, 1, OPTAB_DIRECT);
18924 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
18925 DImode for constant loop counts. */
18927 static enum machine_mode
18928 counter_mode (rtx count_exp)
18930 if (GET_MODE (count_exp) != VOIDmode)
18931 return GET_MODE (count_exp);
18932 if (!CONST_INT_P (count_exp))
18934 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
18939 /* When SRCPTR is non-NULL, output simple loop to move memory
18940 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
18941 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
18942 equivalent loop to set memory by VALUE (supposed to be in MODE).
18944 The size is rounded down to whole number of chunk size moved at once.
18945 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
18949 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
18950 rtx destptr, rtx srcptr, rtx value,
18951 rtx count, enum machine_mode mode, int unroll,
18954 rtx out_label, top_label, iter, tmp;
18955 enum machine_mode iter_mode = counter_mode (count);
18956 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
18957 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
18963 top_label = gen_label_rtx ();
18964 out_label = gen_label_rtx ();
18965 iter = gen_reg_rtx (iter_mode);
18967 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
18968 NULL, 1, OPTAB_DIRECT);
18969 /* Those two should combine. */
18970 if (piece_size == const1_rtx)
18972 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
18974 predict_jump (REG_BR_PROB_BASE * 10 / 100);
18976 emit_move_insn (iter, const0_rtx);
18978 emit_label (top_label);
18980 tmp = convert_modes (Pmode, iter_mode, iter, true);
18981 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
18982 destmem = change_address (destmem, mode, x_addr);
18986 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
18987 srcmem = change_address (srcmem, mode, y_addr);
18989 /* When unrolling for chips that reorder memory reads and writes,
18990 we can save registers by using single temporary.
18991 Also using 4 temporaries is overkill in 32bit mode. */
18992 if (!TARGET_64BIT && 0)
18994 for (i = 0; i < unroll; i++)
18999 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19001 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
19003 emit_move_insn (destmem, srcmem);
19009 gcc_assert (unroll <= 4);
19010 for (i = 0; i < unroll; i++)
19012 tmpreg[i] = gen_reg_rtx (mode);
19016 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
19018 emit_move_insn (tmpreg[i], srcmem);
19020 for (i = 0; i < unroll; i++)
19025 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19027 emit_move_insn (destmem, tmpreg[i]);
19032 for (i = 0; i < unroll; i++)
19036 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
19037 emit_move_insn (destmem, value);
19040 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
19041 true, OPTAB_LIB_WIDEN);
19043 emit_move_insn (iter, tmp);
19045 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
19047 if (expected_size != -1)
19049 expected_size /= GET_MODE_SIZE (mode) * unroll;
19050 if (expected_size == 0)
19052 else if (expected_size > REG_BR_PROB_BASE)
19053 predict_jump (REG_BR_PROB_BASE - 1);
19055 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
19058 predict_jump (REG_BR_PROB_BASE * 80 / 100);
19059 iter = ix86_zero_extend_to_Pmode (iter);
19060 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
19061 true, OPTAB_LIB_WIDEN);
19062 if (tmp != destptr)
19063 emit_move_insn (destptr, tmp);
19066 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
19067 true, OPTAB_LIB_WIDEN);
19069 emit_move_insn (srcptr, tmp);
19071 emit_label (out_label);
19074 /* Output "rep; mov" instruction.
19075 Arguments have same meaning as for previous function */
19077 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
19078 rtx destptr, rtx srcptr,
19080 enum machine_mode mode)
19086 /* If the size is known, it is shorter to use rep movs. */
19087 if (mode == QImode && CONST_INT_P (count)
19088 && !(INTVAL (count) & 3))
19091 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
19092 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
19093 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
19094 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
19095 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
19096 if (mode != QImode)
19098 destexp = gen_rtx_ASHIFT (Pmode, countreg,
19099 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19100 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
19101 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
19102 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19103 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
19107 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
19108 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
19110 if (CONST_INT_P (count))
19112 count = GEN_INT (INTVAL (count)
19113 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
19114 destmem = shallow_copy_rtx (destmem);
19115 srcmem = shallow_copy_rtx (srcmem);
19116 set_mem_size (destmem, count);
19117 set_mem_size (srcmem, count);
19121 if (MEM_SIZE (destmem))
19122 set_mem_size (destmem, NULL_RTX);
19123 if (MEM_SIZE (srcmem))
19124 set_mem_size (srcmem, NULL_RTX);
19126 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
19130 /* Output "rep; stos" instruction.
19131 Arguments have same meaning as for previous function */
19133 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
19134 rtx count, enum machine_mode mode,
19140 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
19141 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
19142 value = force_reg (mode, gen_lowpart (mode, value));
19143 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
19144 if (mode != QImode)
19146 destexp = gen_rtx_ASHIFT (Pmode, countreg,
19147 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
19148 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
19151 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
19152 if (orig_value == const0_rtx && CONST_INT_P (count))
19154 count = GEN_INT (INTVAL (count)
19155 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
19156 destmem = shallow_copy_rtx (destmem);
19157 set_mem_size (destmem, count);
19159 else if (MEM_SIZE (destmem))
19160 set_mem_size (destmem, NULL_RTX);
19161 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
19165 emit_strmov (rtx destmem, rtx srcmem,
19166 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
19168 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
19169 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
19170 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19173 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
19175 expand_movmem_epilogue (rtx destmem, rtx srcmem,
19176 rtx destptr, rtx srcptr, rtx count, int max_size)
19179 if (CONST_INT_P (count))
19181 HOST_WIDE_INT countval = INTVAL (count);
19184 if ((countval & 0x10) && max_size > 16)
19188 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
19189 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
19192 gcc_unreachable ();
19195 if ((countval & 0x08) && max_size > 8)
19198 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
19201 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
19202 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
19206 if ((countval & 0x04) && max_size > 4)
19208 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
19211 if ((countval & 0x02) && max_size > 2)
19213 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
19216 if ((countval & 0x01) && max_size > 1)
19218 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
19225 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
19226 count, 1, OPTAB_DIRECT);
19227 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
19228 count, QImode, 1, 4);
19232 /* When there are stringops, we can cheaply increase dest and src pointers.
19233 Otherwise we save code size by maintaining offset (zero is readily
19234 available from preceding rep operation) and using x86 addressing modes.
19236 if (TARGET_SINGLE_STRINGOP)
19240 rtx label = ix86_expand_aligntest (count, 4, true);
19241 src = change_address (srcmem, SImode, srcptr);
19242 dest = change_address (destmem, SImode, destptr);
19243 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19244 emit_label (label);
19245 LABEL_NUSES (label) = 1;
19249 rtx label = ix86_expand_aligntest (count, 2, true);
19250 src = change_address (srcmem, HImode, srcptr);
19251 dest = change_address (destmem, HImode, destptr);
19252 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19253 emit_label (label);
19254 LABEL_NUSES (label) = 1;
19258 rtx label = ix86_expand_aligntest (count, 1, true);
19259 src = change_address (srcmem, QImode, srcptr);
19260 dest = change_address (destmem, QImode, destptr);
19261 emit_insn (gen_strmov (destptr, dest, srcptr, src));
19262 emit_label (label);
19263 LABEL_NUSES (label) = 1;
19268 rtx offset = force_reg (Pmode, const0_rtx);
19273 rtx label = ix86_expand_aligntest (count, 4, true);
19274 src = change_address (srcmem, SImode, srcptr);
19275 dest = change_address (destmem, SImode, destptr);
19276 emit_move_insn (dest, src);
19277 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
19278 true, OPTAB_LIB_WIDEN);
19280 emit_move_insn (offset, tmp);
19281 emit_label (label);
19282 LABEL_NUSES (label) = 1;
19286 rtx label = ix86_expand_aligntest (count, 2, true);
19287 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
19288 src = change_address (srcmem, HImode, tmp);
19289 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
19290 dest = change_address (destmem, HImode, tmp);
19291 emit_move_insn (dest, src);
19292 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
19293 true, OPTAB_LIB_WIDEN);
19295 emit_move_insn (offset, tmp);
19296 emit_label (label);
19297 LABEL_NUSES (label) = 1;
19301 rtx label = ix86_expand_aligntest (count, 1, true);
19302 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
19303 src = change_address (srcmem, QImode, tmp);
19304 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
19305 dest = change_address (destmem, QImode, tmp);
19306 emit_move_insn (dest, src);
19307 emit_label (label);
19308 LABEL_NUSES (label) = 1;
19313 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
19315 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
19316 rtx count, int max_size)
19319 expand_simple_binop (counter_mode (count), AND, count,
19320 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
19321 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
19322 gen_lowpart (QImode, value), count, QImode,
19326 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
19328 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
19332 if (CONST_INT_P (count))
19334 HOST_WIDE_INT countval = INTVAL (count);
19337 if ((countval & 0x10) && max_size > 16)
19341 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
19342 emit_insn (gen_strset (destptr, dest, value));
19343 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
19344 emit_insn (gen_strset (destptr, dest, value));
19347 gcc_unreachable ();
19350 if ((countval & 0x08) && max_size > 8)
19354 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
19355 emit_insn (gen_strset (destptr, dest, value));
19359 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
19360 emit_insn (gen_strset (destptr, dest, value));
19361 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
19362 emit_insn (gen_strset (destptr, dest, value));
19366 if ((countval & 0x04) && max_size > 4)
19368 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
19369 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
19372 if ((countval & 0x02) && max_size > 2)
19374 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
19375 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
19378 if ((countval & 0x01) && max_size > 1)
19380 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
19381 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
19388 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
19393 rtx label = ix86_expand_aligntest (count, 16, true);
19396 dest = change_address (destmem, DImode, destptr);
19397 emit_insn (gen_strset (destptr, dest, value));
19398 emit_insn (gen_strset (destptr, dest, value));
19402 dest = change_address (destmem, SImode, destptr);
19403 emit_insn (gen_strset (destptr, dest, value));
19404 emit_insn (gen_strset (destptr, dest, value));
19405 emit_insn (gen_strset (destptr, dest, value));
19406 emit_insn (gen_strset (destptr, dest, value));
19408 emit_label (label);
19409 LABEL_NUSES (label) = 1;
19413 rtx label = ix86_expand_aligntest (count, 8, true);
19416 dest = change_address (destmem, DImode, destptr);
19417 emit_insn (gen_strset (destptr, dest, value));
19421 dest = change_address (destmem, SImode, destptr);
19422 emit_insn (gen_strset (destptr, dest, value));
19423 emit_insn (gen_strset (destptr, dest, value));
19425 emit_label (label);
19426 LABEL_NUSES (label) = 1;
19430 rtx label = ix86_expand_aligntest (count, 4, true);
19431 dest = change_address (destmem, SImode, destptr);
19432 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
19433 emit_label (label);
19434 LABEL_NUSES (label) = 1;
19438 rtx label = ix86_expand_aligntest (count, 2, true);
19439 dest = change_address (destmem, HImode, destptr);
19440 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
19441 emit_label (label);
19442 LABEL_NUSES (label) = 1;
19446 rtx label = ix86_expand_aligntest (count, 1, true);
19447 dest = change_address (destmem, QImode, destptr);
19448 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
19449 emit_label (label);
19450 LABEL_NUSES (label) = 1;
19454 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
19455 DESIRED_ALIGNMENT. */
19457 expand_movmem_prologue (rtx destmem, rtx srcmem,
19458 rtx destptr, rtx srcptr, rtx count,
19459 int align, int desired_alignment)
19461 if (align <= 1 && desired_alignment > 1)
19463 rtx label = ix86_expand_aligntest (destptr, 1, false);
19464 srcmem = change_address (srcmem, QImode, srcptr);
19465 destmem = change_address (destmem, QImode, destptr);
19466 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
19467 ix86_adjust_counter (count, 1);
19468 emit_label (label);
19469 LABEL_NUSES (label) = 1;
19471 if (align <= 2 && desired_alignment > 2)
19473 rtx label = ix86_expand_aligntest (destptr, 2, false);
19474 srcmem = change_address (srcmem, HImode, srcptr);
19475 destmem = change_address (destmem, HImode, destptr);
19476 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
19477 ix86_adjust_counter (count, 2);
19478 emit_label (label);
19479 LABEL_NUSES (label) = 1;
19481 if (align <= 4 && desired_alignment > 4)
19483 rtx label = ix86_expand_aligntest (destptr, 4, false);
19484 srcmem = change_address (srcmem, SImode, srcptr);
19485 destmem = change_address (destmem, SImode, destptr);
19486 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
19487 ix86_adjust_counter (count, 4);
19488 emit_label (label);
19489 LABEL_NUSES (label) = 1;
19491 gcc_assert (desired_alignment <= 8);
19494 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
19495 ALIGN_BYTES is how many bytes need to be copied. */
19497 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
19498 int desired_align, int align_bytes)
19501 rtx src_size, dst_size;
19503 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
19504 if (src_align_bytes >= 0)
19505 src_align_bytes = desired_align - src_align_bytes;
19506 src_size = MEM_SIZE (src);
19507 dst_size = MEM_SIZE (dst);
19508 if (align_bytes & 1)
19510 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
19511 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
19513 emit_insn (gen_strmov (destreg, dst, srcreg, src));
19515 if (align_bytes & 2)
19517 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
19518 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
19519 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
19520 set_mem_align (dst, 2 * BITS_PER_UNIT);
19521 if (src_align_bytes >= 0
19522 && (src_align_bytes & 1) == (align_bytes & 1)
19523 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
19524 set_mem_align (src, 2 * BITS_PER_UNIT);
19526 emit_insn (gen_strmov (destreg, dst, srcreg, src));
19528 if (align_bytes & 4)
19530 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
19531 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
19532 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
19533 set_mem_align (dst, 4 * BITS_PER_UNIT);
19534 if (src_align_bytes >= 0)
19536 unsigned int src_align = 0;
19537 if ((src_align_bytes & 3) == (align_bytes & 3))
19539 else if ((src_align_bytes & 1) == (align_bytes & 1))
19541 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
19542 set_mem_align (src, src_align * BITS_PER_UNIT);
19545 emit_insn (gen_strmov (destreg, dst, srcreg, src));
19547 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
19548 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
19549 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
19550 set_mem_align (dst, desired_align * BITS_PER_UNIT);
19551 if (src_align_bytes >= 0)
19553 unsigned int src_align = 0;
19554 if ((src_align_bytes & 7) == (align_bytes & 7))
19556 else if ((src_align_bytes & 3) == (align_bytes & 3))
19558 else if ((src_align_bytes & 1) == (align_bytes & 1))
19560 if (src_align > (unsigned int) desired_align)
19561 src_align = desired_align;
19562 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
19563 set_mem_align (src, src_align * BITS_PER_UNIT);
19566 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
19568 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
19573 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
19574 DESIRED_ALIGNMENT. */
19576 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
19577 int align, int desired_alignment)
19579 if (align <= 1 && desired_alignment > 1)
19581 rtx label = ix86_expand_aligntest (destptr, 1, false);
19582 destmem = change_address (destmem, QImode, destptr);
19583 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
19584 ix86_adjust_counter (count, 1);
19585 emit_label (label);
19586 LABEL_NUSES (label) = 1;
19588 if (align <= 2 && desired_alignment > 2)
19590 rtx label = ix86_expand_aligntest (destptr, 2, false);
19591 destmem = change_address (destmem, HImode, destptr);
19592 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
19593 ix86_adjust_counter (count, 2);
19594 emit_label (label);
19595 LABEL_NUSES (label) = 1;
19597 if (align <= 4 && desired_alignment > 4)
19599 rtx label = ix86_expand_aligntest (destptr, 4, false);
19600 destmem = change_address (destmem, SImode, destptr);
19601 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
19602 ix86_adjust_counter (count, 4);
19603 emit_label (label);
19604 LABEL_NUSES (label) = 1;
19606 gcc_assert (desired_alignment <= 8);
19609 /* Set enough from DST to align DST known to by aligned by ALIGN to
19610 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
19612 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
19613 int desired_align, int align_bytes)
19616 rtx dst_size = MEM_SIZE (dst);
19617 if (align_bytes & 1)
19619 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
19621 emit_insn (gen_strset (destreg, dst,
19622 gen_lowpart (QImode, value)));
19624 if (align_bytes & 2)
19626 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
19627 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
19628 set_mem_align (dst, 2 * BITS_PER_UNIT);
19630 emit_insn (gen_strset (destreg, dst,
19631 gen_lowpart (HImode, value)));
19633 if (align_bytes & 4)
19635 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
19636 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
19637 set_mem_align (dst, 4 * BITS_PER_UNIT);
19639 emit_insn (gen_strset (destreg, dst,
19640 gen_lowpart (SImode, value)));
19642 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
19643 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
19644 set_mem_align (dst, desired_align * BITS_PER_UNIT);
19646 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
19650 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
19651 static enum stringop_alg
19652 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
19653 int *dynamic_check)
19655 const struct stringop_algs * algs;
19656 bool optimize_for_speed;
19657 /* Algorithms using the rep prefix want at least edi and ecx;
19658 additionally, memset wants eax and memcpy wants esi. Don't
19659 consider such algorithms if the user has appropriated those
19660 registers for their own purposes. */
19661 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
19663 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
19665 #define ALG_USABLE_P(alg) (rep_prefix_usable \
19666 || (alg != rep_prefix_1_byte \
19667 && alg != rep_prefix_4_byte \
19668 && alg != rep_prefix_8_byte))
19669 const struct processor_costs *cost;
19671 /* Even if the string operation call is cold, we still might spend a lot
19672 of time processing large blocks. */
19673 if (optimize_function_for_size_p (cfun)
19674 || (optimize_insn_for_size_p ()
19675 && expected_size != -1 && expected_size < 256))
19676 optimize_for_speed = false;
19678 optimize_for_speed = true;
19680 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
19682 *dynamic_check = -1;
19684 algs = &cost->memset[TARGET_64BIT != 0];
19686 algs = &cost->memcpy[TARGET_64BIT != 0];
19687 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
19688 return stringop_alg;
19689 /* rep; movq or rep; movl is the smallest variant. */
19690 else if (!optimize_for_speed)
19692 if (!count || (count & 3))
19693 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
19695 return rep_prefix_usable ? rep_prefix_4_byte : loop;
19697 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
19699 else if (expected_size != -1 && expected_size < 4)
19700 return loop_1_byte;
19701 else if (expected_size != -1)
19704 enum stringop_alg alg = libcall;
19705 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
19707 /* We get here if the algorithms that were not libcall-based
19708 were rep-prefix based and we are unable to use rep prefixes
19709 based on global register usage. Break out of the loop and
19710 use the heuristic below. */
19711 if (algs->size[i].max == 0)
19713 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
19715 enum stringop_alg candidate = algs->size[i].alg;
19717 if (candidate != libcall && ALG_USABLE_P (candidate))
19719 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
19720 last non-libcall inline algorithm. */
19721 if (TARGET_INLINE_ALL_STRINGOPS)
19723 /* When the current size is best to be copied by a libcall,
19724 but we are still forced to inline, run the heuristic below
19725 that will pick code for medium sized blocks. */
19726 if (alg != libcall)
19730 else if (ALG_USABLE_P (candidate))
19734 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
19736 /* When asked to inline the call anyway, try to pick meaningful choice.
19737 We look for maximal size of block that is faster to copy by hand and
19738 take blocks of at most of that size guessing that average size will
19739 be roughly half of the block.
19741 If this turns out to be bad, we might simply specify the preferred
19742 choice in ix86_costs. */
19743 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
19744 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
19747 enum stringop_alg alg;
19749 bool any_alg_usable_p = true;
19751 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
19753 enum stringop_alg candidate = algs->size[i].alg;
19754 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
19756 if (candidate != libcall && candidate
19757 && ALG_USABLE_P (candidate))
19758 max = algs->size[i].max;
19760 /* If there aren't any usable algorithms, then recursing on
19761 smaller sizes isn't going to find anything. Just return the
19762 simple byte-at-a-time copy loop. */
19763 if (!any_alg_usable_p)
19765 /* Pick something reasonable. */
19766 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
19767 *dynamic_check = 128;
19768 return loop_1_byte;
19772 alg = decide_alg (count, max / 2, memset, dynamic_check);
19773 gcc_assert (*dynamic_check == -1);
19774 gcc_assert (alg != libcall);
19775 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
19776 *dynamic_check = max;
19779 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
19780 #undef ALG_USABLE_P
19783 /* Decide on alignment. We know that the operand is already aligned to ALIGN
19784 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
19786 decide_alignment (int align,
19787 enum stringop_alg alg,
19790 int desired_align = 0;
19794 gcc_unreachable ();
19796 case unrolled_loop:
19797 desired_align = GET_MODE_SIZE (Pmode);
19799 case rep_prefix_8_byte:
19802 case rep_prefix_4_byte:
19803 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
19804 copying whole cacheline at once. */
19805 if (TARGET_PENTIUMPRO)
19810 case rep_prefix_1_byte:
19811 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
19812 copying whole cacheline at once. */
19813 if (TARGET_PENTIUMPRO)
19827 if (desired_align < align)
19828 desired_align = align;
19829 if (expected_size != -1 && expected_size < 4)
19830 desired_align = align;
19831 return desired_align;
19834 /* Return the smallest power of 2 greater than VAL. */
19836 smallest_pow2_greater_than (int val)
19844 /* Expand string move (memcpy) operation. Use i386 string operations when
19845 profitable. expand_setmem contains similar code. The code depends upon
19846 architecture, block size and alignment, but always has the same
19849 1) Prologue guard: Conditional that jumps up to epilogues for small
19850 blocks that can be handled by epilogue alone. This is faster but
19851 also needed for correctness, since prologue assume the block is larger
19852 than the desired alignment.
19854 Optional dynamic check for size and libcall for large
19855 blocks is emitted here too, with -minline-stringops-dynamically.
19857 2) Prologue: copy first few bytes in order to get destination aligned
19858 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
19859 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
19860 We emit either a jump tree on power of two sized blocks, or a byte loop.
19862 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
19863 with specified algorithm.
19865 4) Epilogue: code copying tail of the block that is too small to be
19866 handled by main body (or up to size guarded by prologue guard). */
19869 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
19870 rtx expected_align_exp, rtx expected_size_exp)
19876 rtx jump_around_label = NULL;
19877 HOST_WIDE_INT align = 1;
19878 unsigned HOST_WIDE_INT count = 0;
19879 HOST_WIDE_INT expected_size = -1;
19880 int size_needed = 0, epilogue_size_needed;
19881 int desired_align = 0, align_bytes = 0;
19882 enum stringop_alg alg;
19884 bool need_zero_guard = false;
19886 if (CONST_INT_P (align_exp))
19887 align = INTVAL (align_exp);
19888 /* i386 can do misaligned access on reasonably increased cost. */
19889 if (CONST_INT_P (expected_align_exp)
19890 && INTVAL (expected_align_exp) > align)
19891 align = INTVAL (expected_align_exp);
19892 /* ALIGN is the minimum of destination and source alignment, but we care here
19893 just about destination alignment. */
19894 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
19895 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
19897 if (CONST_INT_P (count_exp))
19898 count = expected_size = INTVAL (count_exp);
19899 if (CONST_INT_P (expected_size_exp) && count == 0)
19900 expected_size = INTVAL (expected_size_exp);
19902 /* Make sure we don't need to care about overflow later on. */
19903 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
19906 /* Step 0: Decide on preferred algorithm, desired alignment and
19907 size of chunks to be copied by main loop. */
19909 alg = decide_alg (count, expected_size, false, &dynamic_check);
19910 desired_align = decide_alignment (align, alg, expected_size);
19912 if (!TARGET_ALIGN_STRINGOPS)
19913 align = desired_align;
19915 if (alg == libcall)
19917 gcc_assert (alg != no_stringop);
19919 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
19920 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
19921 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
19926 gcc_unreachable ();
19928 need_zero_guard = true;
19929 size_needed = GET_MODE_SIZE (Pmode);
19931 case unrolled_loop:
19932 need_zero_guard = true;
19933 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
19935 case rep_prefix_8_byte:
19938 case rep_prefix_4_byte:
19941 case rep_prefix_1_byte:
19945 need_zero_guard = true;
19950 epilogue_size_needed = size_needed;
19952 /* Step 1: Prologue guard. */
19954 /* Alignment code needs count to be in register. */
19955 if (CONST_INT_P (count_exp) && desired_align > align)
19957 if (INTVAL (count_exp) > desired_align
19958 && INTVAL (count_exp) > size_needed)
19961 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
19962 if (align_bytes <= 0)
19965 align_bytes = desired_align - align_bytes;
19967 if (align_bytes == 0)
19968 count_exp = force_reg (counter_mode (count_exp), count_exp);
19970 gcc_assert (desired_align >= 1 && align >= 1);
19972 /* Ensure that alignment prologue won't copy past end of block. */
19973 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
19975 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
19976 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
19977 Make sure it is power of 2. */
19978 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
19982 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
19984 /* If main algorithm works on QImode, no epilogue is needed.
19985 For small sizes just don't align anything. */
19986 if (size_needed == 1)
19987 desired_align = align;
19994 label = gen_label_rtx ();
19995 emit_cmp_and_jump_insns (count_exp,
19996 GEN_INT (epilogue_size_needed),
19997 LTU, 0, counter_mode (count_exp), 1, label);
19998 if (expected_size == -1 || expected_size < epilogue_size_needed)
19999 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20001 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20005 /* Emit code to decide on runtime whether library call or inline should be
20007 if (dynamic_check != -1)
20009 if (CONST_INT_P (count_exp))
20011 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
20013 emit_block_move_via_libcall (dst, src, count_exp, false);
20014 count_exp = const0_rtx;
20020 rtx hot_label = gen_label_rtx ();
20021 jump_around_label = gen_label_rtx ();
20022 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
20023 LEU, 0, GET_MODE (count_exp), 1, hot_label);
20024 predict_jump (REG_BR_PROB_BASE * 90 / 100);
20025 emit_block_move_via_libcall (dst, src, count_exp, false);
20026 emit_jump (jump_around_label);
20027 emit_label (hot_label);
20031 /* Step 2: Alignment prologue. */
20033 if (desired_align > align)
20035 if (align_bytes == 0)
20037 /* Except for the first move in epilogue, we no longer know
20038 constant offset in aliasing info. It don't seems to worth
20039 the pain to maintain it for the first move, so throw away
20041 src = change_address (src, BLKmode, srcreg);
20042 dst = change_address (dst, BLKmode, destreg);
20043 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
20048 /* If we know how many bytes need to be stored before dst is
20049 sufficiently aligned, maintain aliasing info accurately. */
20050 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
20051 desired_align, align_bytes);
20052 count_exp = plus_constant (count_exp, -align_bytes);
20053 count -= align_bytes;
20055 if (need_zero_guard
20056 && (count < (unsigned HOST_WIDE_INT) size_needed
20057 || (align_bytes == 0
20058 && count < ((unsigned HOST_WIDE_INT) size_needed
20059 + desired_align - align))))
20061 /* It is possible that we copied enough so the main loop will not
20063 gcc_assert (size_needed > 1);
20064 if (label == NULL_RTX)
20065 label = gen_label_rtx ();
20066 emit_cmp_and_jump_insns (count_exp,
20067 GEN_INT (size_needed),
20068 LTU, 0, counter_mode (count_exp), 1, label);
20069 if (expected_size == -1
20070 || expected_size < (desired_align - align) / 2 + size_needed)
20071 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20073 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20076 if (label && size_needed == 1)
20078 emit_label (label);
20079 LABEL_NUSES (label) = 1;
20081 epilogue_size_needed = 1;
20083 else if (label == NULL_RTX)
20084 epilogue_size_needed = size_needed;
20086 /* Step 3: Main loop. */
20092 gcc_unreachable ();
20094 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20095 count_exp, QImode, 1, expected_size);
20098 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20099 count_exp, Pmode, 1, expected_size);
20101 case unrolled_loop:
20102 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
20103 registers for 4 temporaries anyway. */
20104 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
20105 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
20108 case rep_prefix_8_byte:
20109 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20112 case rep_prefix_4_byte:
20113 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20116 case rep_prefix_1_byte:
20117 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
20121 /* Adjust properly the offset of src and dest memory for aliasing. */
20122 if (CONST_INT_P (count_exp))
20124 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
20125 (count / size_needed) * size_needed);
20126 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
20127 (count / size_needed) * size_needed);
20131 src = change_address (src, BLKmode, srcreg);
20132 dst = change_address (dst, BLKmode, destreg);
20135 /* Step 4: Epilogue to copy the remaining bytes. */
20139 /* When the main loop is done, COUNT_EXP might hold original count,
20140 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
20141 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
20142 bytes. Compensate if needed. */
20144 if (size_needed < epilogue_size_needed)
20147 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
20148 GEN_INT (size_needed - 1), count_exp, 1,
20150 if (tmp != count_exp)
20151 emit_move_insn (count_exp, tmp);
20153 emit_label (label);
20154 LABEL_NUSES (label) = 1;
20157 if (count_exp != const0_rtx && epilogue_size_needed > 1)
20158 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
20159 epilogue_size_needed);
20160 if (jump_around_label)
20161 emit_label (jump_around_label);
20165 /* Helper function for memcpy. For QImode value 0xXY produce
20166 0xXYXYXYXY of wide specified by MODE. This is essentially
20167 a * 0x10101010, but we can do slightly better than
20168 synth_mult by unwinding the sequence by hand on CPUs with
20171 promote_duplicated_reg (enum machine_mode mode, rtx val)
20173 enum machine_mode valmode = GET_MODE (val);
20175 int nops = mode == DImode ? 3 : 2;
20177 gcc_assert (mode == SImode || mode == DImode);
20178 if (val == const0_rtx)
20179 return copy_to_mode_reg (mode, const0_rtx);
20180 if (CONST_INT_P (val))
20182 HOST_WIDE_INT v = INTVAL (val) & 255;
20186 if (mode == DImode)
20187 v |= (v << 16) << 16;
20188 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
20191 if (valmode == VOIDmode)
20193 if (valmode != QImode)
20194 val = gen_lowpart (QImode, val);
20195 if (mode == QImode)
20197 if (!TARGET_PARTIAL_REG_STALL)
20199 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
20200 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
20201 <= (ix86_cost->shift_const + ix86_cost->add) * nops
20202 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
20204 rtx reg = convert_modes (mode, QImode, val, true);
20205 tmp = promote_duplicated_reg (mode, const1_rtx);
20206 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
20211 rtx reg = convert_modes (mode, QImode, val, true);
20213 if (!TARGET_PARTIAL_REG_STALL)
20214 if (mode == SImode)
20215 emit_insn (gen_movsi_insv_1 (reg, reg));
20217 emit_insn (gen_movdi_insv_1 (reg, reg));
20220 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
20221 NULL, 1, OPTAB_DIRECT);
20223 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20225 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
20226 NULL, 1, OPTAB_DIRECT);
20227 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20228 if (mode == SImode)
20230 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
20231 NULL, 1, OPTAB_DIRECT);
20232 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
20237 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
20238 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
20239 alignment from ALIGN to DESIRED_ALIGN. */
20241 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
20246 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
20247 promoted_val = promote_duplicated_reg (DImode, val);
20248 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
20249 promoted_val = promote_duplicated_reg (SImode, val);
20250 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
20251 promoted_val = promote_duplicated_reg (HImode, val);
20253 promoted_val = val;
20255 return promoted_val;
20258 /* Expand string clear operation (bzero). Use i386 string operations when
20259 profitable. See expand_movmem comment for explanation of individual
20260 steps performed. */
20262 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
20263 rtx expected_align_exp, rtx expected_size_exp)
20268 rtx jump_around_label = NULL;
20269 HOST_WIDE_INT align = 1;
20270 unsigned HOST_WIDE_INT count = 0;
20271 HOST_WIDE_INT expected_size = -1;
20272 int size_needed = 0, epilogue_size_needed;
20273 int desired_align = 0, align_bytes = 0;
20274 enum stringop_alg alg;
20275 rtx promoted_val = NULL;
20276 bool force_loopy_epilogue = false;
20278 bool need_zero_guard = false;
20280 if (CONST_INT_P (align_exp))
20281 align = INTVAL (align_exp);
20282 /* i386 can do misaligned access on reasonably increased cost. */
20283 if (CONST_INT_P (expected_align_exp)
20284 && INTVAL (expected_align_exp) > align)
20285 align = INTVAL (expected_align_exp);
20286 if (CONST_INT_P (count_exp))
20287 count = expected_size = INTVAL (count_exp);
20288 if (CONST_INT_P (expected_size_exp) && count == 0)
20289 expected_size = INTVAL (expected_size_exp);
20291 /* Make sure we don't need to care about overflow later on. */
20292 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
20295 /* Step 0: Decide on preferred algorithm, desired alignment and
20296 size of chunks to be copied by main loop. */
20298 alg = decide_alg (count, expected_size, true, &dynamic_check);
20299 desired_align = decide_alignment (align, alg, expected_size);
20301 if (!TARGET_ALIGN_STRINGOPS)
20302 align = desired_align;
20304 if (alg == libcall)
20306 gcc_assert (alg != no_stringop);
20308 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
20309 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
20314 gcc_unreachable ();
20316 need_zero_guard = true;
20317 size_needed = GET_MODE_SIZE (Pmode);
20319 case unrolled_loop:
20320 need_zero_guard = true;
20321 size_needed = GET_MODE_SIZE (Pmode) * 4;
20323 case rep_prefix_8_byte:
20326 case rep_prefix_4_byte:
20329 case rep_prefix_1_byte:
20333 need_zero_guard = true;
20337 epilogue_size_needed = size_needed;
20339 /* Step 1: Prologue guard. */
20341 /* Alignment code needs count to be in register. */
20342 if (CONST_INT_P (count_exp) && desired_align > align)
20344 if (INTVAL (count_exp) > desired_align
20345 && INTVAL (count_exp) > size_needed)
20348 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
20349 if (align_bytes <= 0)
20352 align_bytes = desired_align - align_bytes;
20354 if (align_bytes == 0)
20356 enum machine_mode mode = SImode;
20357 if (TARGET_64BIT && (count & ~0xffffffff))
20359 count_exp = force_reg (mode, count_exp);
20362 /* Do the cheap promotion to allow better CSE across the
20363 main loop and epilogue (ie one load of the big constant in the
20364 front of all code. */
20365 if (CONST_INT_P (val_exp))
20366 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
20367 desired_align, align);
20368 /* Ensure that alignment prologue won't copy past end of block. */
20369 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
20371 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
20372 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
20373 Make sure it is power of 2. */
20374 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
20376 /* To improve performance of small blocks, we jump around the VAL
20377 promoting mode. This mean that if the promoted VAL is not constant,
20378 we might not use it in the epilogue and have to use byte
20380 if (epilogue_size_needed > 2 && !promoted_val)
20381 force_loopy_epilogue = true;
20384 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
20386 /* If main algorithm works on QImode, no epilogue is needed.
20387 For small sizes just don't align anything. */
20388 if (size_needed == 1)
20389 desired_align = align;
20396 label = gen_label_rtx ();
20397 emit_cmp_and_jump_insns (count_exp,
20398 GEN_INT (epilogue_size_needed),
20399 LTU, 0, counter_mode (count_exp), 1, label);
20400 if (expected_size == -1 || expected_size <= epilogue_size_needed)
20401 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20403 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20406 if (dynamic_check != -1)
20408 rtx hot_label = gen_label_rtx ();
20409 jump_around_label = gen_label_rtx ();
20410 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
20411 LEU, 0, counter_mode (count_exp), 1, hot_label);
20412 predict_jump (REG_BR_PROB_BASE * 90 / 100);
20413 set_storage_via_libcall (dst, count_exp, val_exp, false);
20414 emit_jump (jump_around_label);
20415 emit_label (hot_label);
20418 /* Step 2: Alignment prologue. */
20420 /* Do the expensive promotion once we branched off the small blocks. */
20422 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
20423 desired_align, align);
20424 gcc_assert (desired_align >= 1 && align >= 1);
20426 if (desired_align > align)
20428 if (align_bytes == 0)
20430 /* Except for the first move in epilogue, we no longer know
20431 constant offset in aliasing info. It don't seems to worth
20432 the pain to maintain it for the first move, so throw away
20434 dst = change_address (dst, BLKmode, destreg);
20435 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
20440 /* If we know how many bytes need to be stored before dst is
20441 sufficiently aligned, maintain aliasing info accurately. */
20442 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
20443 desired_align, align_bytes);
20444 count_exp = plus_constant (count_exp, -align_bytes);
20445 count -= align_bytes;
20447 if (need_zero_guard
20448 && (count < (unsigned HOST_WIDE_INT) size_needed
20449 || (align_bytes == 0
20450 && count < ((unsigned HOST_WIDE_INT) size_needed
20451 + desired_align - align))))
20453 /* It is possible that we copied enough so the main loop will not
20455 gcc_assert (size_needed > 1);
20456 if (label == NULL_RTX)
20457 label = gen_label_rtx ();
20458 emit_cmp_and_jump_insns (count_exp,
20459 GEN_INT (size_needed),
20460 LTU, 0, counter_mode (count_exp), 1, label);
20461 if (expected_size == -1
20462 || expected_size < (desired_align - align) / 2 + size_needed)
20463 predict_jump (REG_BR_PROB_BASE * 20 / 100);
20465 predict_jump (REG_BR_PROB_BASE * 60 / 100);
20468 if (label && size_needed == 1)
20470 emit_label (label);
20471 LABEL_NUSES (label) = 1;
20473 promoted_val = val_exp;
20474 epilogue_size_needed = 1;
20476 else if (label == NULL_RTX)
20477 epilogue_size_needed = size_needed;
20479 /* Step 3: Main loop. */
20485 gcc_unreachable ();
20487 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
20488 count_exp, QImode, 1, expected_size);
20491 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
20492 count_exp, Pmode, 1, expected_size);
20494 case unrolled_loop:
20495 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
20496 count_exp, Pmode, 4, expected_size);
20498 case rep_prefix_8_byte:
20499 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
20502 case rep_prefix_4_byte:
20503 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
20506 case rep_prefix_1_byte:
20507 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
20511 /* Adjust properly the offset of src and dest memory for aliasing. */
20512 if (CONST_INT_P (count_exp))
20513 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
20514 (count / size_needed) * size_needed);
20516 dst = change_address (dst, BLKmode, destreg);
20518 /* Step 4: Epilogue to copy the remaining bytes. */
20522 /* When the main loop is done, COUNT_EXP might hold original count,
20523 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
20524 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
20525 bytes. Compensate if needed. */
20527 if (size_needed < epilogue_size_needed)
20530 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
20531 GEN_INT (size_needed - 1), count_exp, 1,
20533 if (tmp != count_exp)
20534 emit_move_insn (count_exp, tmp);
20536 emit_label (label);
20537 LABEL_NUSES (label) = 1;
20540 if (count_exp != const0_rtx && epilogue_size_needed > 1)
20542 if (force_loopy_epilogue)
20543 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
20544 epilogue_size_needed);
20546 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
20547 epilogue_size_needed);
20549 if (jump_around_label)
20550 emit_label (jump_around_label);
20554 /* Expand the appropriate insns for doing strlen if not just doing
20557 out = result, initialized with the start address
20558 align_rtx = alignment of the address.
20559 scratch = scratch register, initialized with the startaddress when
20560 not aligned, otherwise undefined
20562 This is just the body. It needs the initializations mentioned above and
20563 some address computing at the end. These things are done in i386.md. */
20566 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
20570 rtx align_2_label = NULL_RTX;
20571 rtx align_3_label = NULL_RTX;
20572 rtx align_4_label = gen_label_rtx ();
20573 rtx end_0_label = gen_label_rtx ();
20575 rtx tmpreg = gen_reg_rtx (SImode);
20576 rtx scratch = gen_reg_rtx (SImode);
20580 if (CONST_INT_P (align_rtx))
20581 align = INTVAL (align_rtx);
20583 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
20585 /* Is there a known alignment and is it less than 4? */
20588 rtx scratch1 = gen_reg_rtx (Pmode);
20589 emit_move_insn (scratch1, out);
20590 /* Is there a known alignment and is it not 2? */
20593 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
20594 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
20596 /* Leave just the 3 lower bits. */
20597 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
20598 NULL_RTX, 0, OPTAB_WIDEN);
20600 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
20601 Pmode, 1, align_4_label);
20602 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
20603 Pmode, 1, align_2_label);
20604 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
20605 Pmode, 1, align_3_label);
20609 /* Since the alignment is 2, we have to check 2 or 0 bytes;
20610 check if is aligned to 4 - byte. */
20612 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
20613 NULL_RTX, 0, OPTAB_WIDEN);
20615 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
20616 Pmode, 1, align_4_label);
20619 mem = change_address (src, QImode, out);
20621 /* Now compare the bytes. */
20623 /* Compare the first n unaligned byte on a byte per byte basis. */
20624 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
20625 QImode, 1, end_0_label);
20627 /* Increment the address. */
20628 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
20630 /* Not needed with an alignment of 2 */
20633 emit_label (align_2_label);
20635 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
20638 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
20640 emit_label (align_3_label);
20643 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
20646 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
20649 /* Generate loop to check 4 bytes at a time. It is not a good idea to
20650 align this loop. It gives only huge programs, but does not help to
20652 emit_label (align_4_label);
20654 mem = change_address (src, SImode, out);
20655 emit_move_insn (scratch, mem);
20656 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
20658 /* This formula yields a nonzero result iff one of the bytes is zero.
20659 This saves three branches inside loop and many cycles. */
20661 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
20662 emit_insn (gen_one_cmplsi2 (scratch, scratch));
20663 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
20664 emit_insn (gen_andsi3 (tmpreg, tmpreg,
20665 gen_int_mode (0x80808080, SImode)));
20666 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
20671 rtx reg = gen_reg_rtx (SImode);
20672 rtx reg2 = gen_reg_rtx (Pmode);
20673 emit_move_insn (reg, tmpreg);
20674 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
20676 /* If zero is not in the first two bytes, move two bytes forward. */
20677 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
20678 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20679 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
20680 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
20681 gen_rtx_IF_THEN_ELSE (SImode, tmp,
20684 /* Emit lea manually to avoid clobbering of flags. */
20685 emit_insn (gen_rtx_SET (SImode, reg2,
20686 gen_rtx_PLUS (Pmode, out, const2_rtx)));
20688 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20689 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
20690 emit_insn (gen_rtx_SET (VOIDmode, out,
20691 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
20697 rtx end_2_label = gen_label_rtx ();
20698 /* Is zero in the first two bytes? */
20700 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
20701 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20702 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
20703 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
20704 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
20706 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
20707 JUMP_LABEL (tmp) = end_2_label;
20709 /* Not in the first two. Move two bytes forward. */
20710 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
20711 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
20713 emit_label (end_2_label);
20717 /* Avoid branch in fixing the byte. */
20718 tmpreg = gen_lowpart (QImode, tmpreg);
20719 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
20720 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
20721 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
20722 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
20724 emit_label (end_0_label);
20727 /* Expand strlen. */
20730 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
20732 rtx addr, scratch1, scratch2, scratch3, scratch4;
20734 /* The generic case of strlen expander is long. Avoid it's
20735 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
20737 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
20738 && !TARGET_INLINE_ALL_STRINGOPS
20739 && !optimize_insn_for_size_p ()
20740 && (!CONST_INT_P (align) || INTVAL (align) < 4))
20743 addr = force_reg (Pmode, XEXP (src, 0));
20744 scratch1 = gen_reg_rtx (Pmode);
20746 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
20747 && !optimize_insn_for_size_p ())
20749 /* Well it seems that some optimizer does not combine a call like
20750 foo(strlen(bar), strlen(bar));
20751 when the move and the subtraction is done here. It does calculate
20752 the length just once when these instructions are done inside of
20753 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
20754 often used and I use one fewer register for the lifetime of
20755 output_strlen_unroll() this is better. */
20757 emit_move_insn (out, addr);
20759 ix86_expand_strlensi_unroll_1 (out, src, align);
20761 /* strlensi_unroll_1 returns the address of the zero at the end of
20762 the string, like memchr(), so compute the length by subtracting
20763 the start address. */
20764 emit_insn (ix86_gen_sub3 (out, out, addr));
20770 /* Can't use this if the user has appropriated eax, ecx, or edi. */
20771 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
20774 scratch2 = gen_reg_rtx (Pmode);
20775 scratch3 = gen_reg_rtx (Pmode);
20776 scratch4 = force_reg (Pmode, constm1_rtx);
20778 emit_move_insn (scratch3, addr);
20779 eoschar = force_reg (QImode, eoschar);
20781 src = replace_equiv_address_nv (src, scratch3);
20783 /* If .md starts supporting :P, this can be done in .md. */
20784 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
20785 scratch4), UNSPEC_SCAS);
20786 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
20787 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
20788 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
20793 /* For given symbol (function) construct code to compute address of it's PLT
20794 entry in large x86-64 PIC model. */
20796 construct_plt_address (rtx symbol)
20798 rtx tmp = gen_reg_rtx (Pmode);
20799 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
20801 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
20802 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
20804 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
20805 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
20810 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
20812 rtx pop, int sibcall)
20814 rtx use = NULL, call;
20816 if (pop == const0_rtx)
20818 gcc_assert (!TARGET_64BIT || !pop);
20820 if (TARGET_MACHO && !TARGET_64BIT)
20823 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
20824 fnaddr = machopic_indirect_call_target (fnaddr);
20829 /* Static functions and indirect calls don't need the pic register. */
20830 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
20831 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
20832 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
20833 use_reg (&use, pic_offset_table_rtx);
20836 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
20838 rtx al = gen_rtx_REG (QImode, AX_REG);
20839 emit_move_insn (al, callarg2);
20840 use_reg (&use, al);
20843 if (ix86_cmodel == CM_LARGE_PIC
20845 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
20846 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
20847 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
20849 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
20850 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
20852 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
20853 fnaddr = gen_rtx_MEM (QImode, fnaddr);
20856 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
20858 call = gen_rtx_SET (VOIDmode, retval, call);
20861 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
20862 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
20863 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
20866 && ix86_cfun_abi () == MS_ABI
20867 && (!callarg2 || INTVAL (callarg2) != -2))
20869 /* We need to represent that SI and DI registers are clobbered
20871 static int clobbered_registers[] = {
20872 XMM6_REG, XMM7_REG, XMM8_REG,
20873 XMM9_REG, XMM10_REG, XMM11_REG,
20874 XMM12_REG, XMM13_REG, XMM14_REG,
20875 XMM15_REG, SI_REG, DI_REG
20878 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
20879 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
20880 UNSPEC_MS_TO_SYSV_CALL);
20884 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
20885 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
20888 (SSE_REGNO_P (clobbered_registers[i])
20890 clobbered_registers[i]));
20892 call = gen_rtx_PARALLEL (VOIDmode,
20893 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
20897 call = emit_call_insn (call);
20899 CALL_INSN_FUNCTION_USAGE (call) = use;
20905 /* Clear stack slot assignments remembered from previous functions.
20906 This is called from INIT_EXPANDERS once before RTL is emitted for each
20909 static struct machine_function *
20910 ix86_init_machine_status (void)
20912 struct machine_function *f;
20914 f = ggc_alloc_cleared_machine_function ();
20915 f->use_fast_prologue_epilogue_nregs = -1;
20916 f->tls_descriptor_call_expanded_p = 0;
20917 f->call_abi = ix86_abi;
20922 /* Return a MEM corresponding to a stack slot with mode MODE.
20923 Allocate a new slot if necessary.
20925 The RTL for a function can have several slots available: N is
20926 which slot to use. */
20929 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
20931 struct stack_local_entry *s;
20933 gcc_assert (n < MAX_386_STACK_LOCALS);
20935 /* Virtual slot is valid only before vregs are instantiated. */
20936 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
20938 for (s = ix86_stack_locals; s; s = s->next)
20939 if (s->mode == mode && s->n == n)
20940 return copy_rtx (s->rtl);
20942 s = ggc_alloc_stack_local_entry ();
20945 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
20947 s->next = ix86_stack_locals;
20948 ix86_stack_locals = s;
20952 /* Construct the SYMBOL_REF for the tls_get_addr function. */
20954 static GTY(()) rtx ix86_tls_symbol;
20956 ix86_tls_get_addr (void)
20959 if (!ix86_tls_symbol)
20961 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
20962 (TARGET_ANY_GNU_TLS
20964 ? "___tls_get_addr"
20965 : "__tls_get_addr");
20968 return ix86_tls_symbol;
20971 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
20973 static GTY(()) rtx ix86_tls_module_base_symbol;
20975 ix86_tls_module_base (void)
20978 if (!ix86_tls_module_base_symbol)
20980 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
20981 "_TLS_MODULE_BASE_");
20982 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
20983 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
20986 return ix86_tls_module_base_symbol;
20989 /* Calculate the length of the memory address in the instruction
20990 encoding. Does not include the one-byte modrm, opcode, or prefix. */
20993 memory_address_length (rtx addr)
20995 struct ix86_address parts;
20996 rtx base, index, disp;
21000 if (GET_CODE (addr) == PRE_DEC
21001 || GET_CODE (addr) == POST_INC
21002 || GET_CODE (addr) == PRE_MODIFY
21003 || GET_CODE (addr) == POST_MODIFY)
21006 ok = ix86_decompose_address (addr, &parts);
21009 if (parts.base && GET_CODE (parts.base) == SUBREG)
21010 parts.base = SUBREG_REG (parts.base);
21011 if (parts.index && GET_CODE (parts.index) == SUBREG)
21012 parts.index = SUBREG_REG (parts.index);
21015 index = parts.index;
21020 - esp as the base always wants an index,
21021 - ebp as the base always wants a displacement,
21022 - r12 as the base always wants an index,
21023 - r13 as the base always wants a displacement. */
21025 /* Register Indirect. */
21026 if (base && !index && !disp)
21028 /* esp (for its index) and ebp (for its displacement) need
21029 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
21032 && (addr == arg_pointer_rtx
21033 || addr == frame_pointer_rtx
21034 || REGNO (addr) == SP_REG
21035 || REGNO (addr) == BP_REG
21036 || REGNO (addr) == R12_REG
21037 || REGNO (addr) == R13_REG))
21041 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
21042 is not disp32, but disp32(%rip), so for disp32
21043 SIB byte is needed, unless print_operand_address
21044 optimizes it into disp32(%rip) or (%rip) is implied
21046 else if (disp && !base && !index)
21053 if (GET_CODE (disp) == CONST)
21054 symbol = XEXP (disp, 0);
21055 if (GET_CODE (symbol) == PLUS
21056 && CONST_INT_P (XEXP (symbol, 1)))
21057 symbol = XEXP (symbol, 0);
21059 if (GET_CODE (symbol) != LABEL_REF
21060 && (GET_CODE (symbol) != SYMBOL_REF
21061 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
21062 && (GET_CODE (symbol) != UNSPEC
21063 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
21064 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
21071 /* Find the length of the displacement constant. */
21074 if (base && satisfies_constraint_K (disp))
21079 /* ebp always wants a displacement. Similarly r13. */
21080 else if (base && REG_P (base)
21081 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
21084 /* An index requires the two-byte modrm form.... */
21086 /* ...like esp (or r12), which always wants an index. */
21087 || base == arg_pointer_rtx
21088 || base == frame_pointer_rtx
21089 || (base && REG_P (base)
21090 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
21107 /* Compute default value for "length_immediate" attribute. When SHORTFORM
21108 is set, expect that insn have 8bit immediate alternative. */
21110 ix86_attr_length_immediate_default (rtx insn, int shortform)
21114 extract_insn_cached (insn);
21115 for (i = recog_data.n_operands - 1; i >= 0; --i)
21116 if (CONSTANT_P (recog_data.operand[i]))
21118 enum attr_mode mode = get_attr_mode (insn);
21121 if (shortform && CONST_INT_P (recog_data.operand[i]))
21123 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
21130 ival = trunc_int_for_mode (ival, HImode);
21133 ival = trunc_int_for_mode (ival, SImode);
21138 if (IN_RANGE (ival, -128, 127))
21155 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
21160 fatal_insn ("unknown insn mode", insn);
21165 /* Compute default value for "length_address" attribute. */
21167 ix86_attr_length_address_default (rtx insn)
21171 if (get_attr_type (insn) == TYPE_LEA)
21173 rtx set = PATTERN (insn), addr;
21175 if (GET_CODE (set) == PARALLEL)
21176 set = XVECEXP (set, 0, 0);
21178 gcc_assert (GET_CODE (set) == SET);
21180 addr = SET_SRC (set);
21181 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
21183 if (GET_CODE (addr) == ZERO_EXTEND)
21184 addr = XEXP (addr, 0);
21185 if (GET_CODE (addr) == SUBREG)
21186 addr = SUBREG_REG (addr);
21189 return memory_address_length (addr);
21192 extract_insn_cached (insn);
21193 for (i = recog_data.n_operands - 1; i >= 0; --i)
21194 if (MEM_P (recog_data.operand[i]))
21196 constrain_operands_cached (reload_completed);
21197 if (which_alternative != -1)
21199 const char *constraints = recog_data.constraints[i];
21200 int alt = which_alternative;
21202 while (*constraints == '=' || *constraints == '+')
21205 while (*constraints++ != ',')
21207 /* Skip ignored operands. */
21208 if (*constraints == 'X')
21211 return memory_address_length (XEXP (recog_data.operand[i], 0));
21216 /* Compute default value for "length_vex" attribute. It includes
21217 2 or 3 byte VEX prefix and 1 opcode byte. */
21220 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
21225 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
21226 byte VEX prefix. */
21227 if (!has_0f_opcode || has_vex_w)
21230 /* We can always use 2 byte VEX prefix in 32bit. */
21234 extract_insn_cached (insn);
21236 for (i = recog_data.n_operands - 1; i >= 0; --i)
21237 if (REG_P (recog_data.operand[i]))
21239 /* REX.W bit uses 3 byte VEX prefix. */
21240 if (GET_MODE (recog_data.operand[i]) == DImode
21241 && GENERAL_REG_P (recog_data.operand[i]))
21246 /* REX.X or REX.B bits use 3 byte VEX prefix. */
21247 if (MEM_P (recog_data.operand[i])
21248 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
21255 /* Return the maximum number of instructions a cpu can issue. */
21258 ix86_issue_rate (void)
21262 case PROCESSOR_PENTIUM:
21263 case PROCESSOR_ATOM:
21267 case PROCESSOR_PENTIUMPRO:
21268 case PROCESSOR_PENTIUM4:
21269 case PROCESSOR_ATHLON:
21271 case PROCESSOR_AMDFAM10:
21272 case PROCESSOR_NOCONA:
21273 case PROCESSOR_GENERIC32:
21274 case PROCESSOR_GENERIC64:
21275 case PROCESSOR_BDVER1:
21278 case PROCESSOR_CORE2:
21286 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
21287 by DEP_INSN and nothing set by DEP_INSN. */
21290 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
21294 /* Simplify the test for uninteresting insns. */
21295 if (insn_type != TYPE_SETCC
21296 && insn_type != TYPE_ICMOV
21297 && insn_type != TYPE_FCMOV
21298 && insn_type != TYPE_IBR)
21301 if ((set = single_set (dep_insn)) != 0)
21303 set = SET_DEST (set);
21306 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
21307 && XVECLEN (PATTERN (dep_insn), 0) == 2
21308 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
21309 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
21311 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
21312 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
21317 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
21320 /* This test is true if the dependent insn reads the flags but
21321 not any other potentially set register. */
21322 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
21325 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
21331 /* Return true iff USE_INSN has a memory address with operands set by
21335 ix86_agi_dependent (rtx set_insn, rtx use_insn)
21338 extract_insn_cached (use_insn);
21339 for (i = recog_data.n_operands - 1; i >= 0; --i)
21340 if (MEM_P (recog_data.operand[i]))
21342 rtx addr = XEXP (recog_data.operand[i], 0);
21343 return modified_in_p (addr, set_insn) != 0;
21349 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
21351 enum attr_type insn_type, dep_insn_type;
21352 enum attr_memory memory;
21354 int dep_insn_code_number;
21356 /* Anti and output dependencies have zero cost on all CPUs. */
21357 if (REG_NOTE_KIND (link) != 0)
21360 dep_insn_code_number = recog_memoized (dep_insn);
21362 /* If we can't recognize the insns, we can't really do anything. */
21363 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
21366 insn_type = get_attr_type (insn);
21367 dep_insn_type = get_attr_type (dep_insn);
21371 case PROCESSOR_PENTIUM:
21372 /* Address Generation Interlock adds a cycle of latency. */
21373 if (insn_type == TYPE_LEA)
21375 rtx addr = PATTERN (insn);
21377 if (GET_CODE (addr) == PARALLEL)
21378 addr = XVECEXP (addr, 0, 0);
21380 gcc_assert (GET_CODE (addr) == SET);
21382 addr = SET_SRC (addr);
21383 if (modified_in_p (addr, dep_insn))
21386 else if (ix86_agi_dependent (dep_insn, insn))
21389 /* ??? Compares pair with jump/setcc. */
21390 if (ix86_flags_dependent (insn, dep_insn, insn_type))
21393 /* Floating point stores require value to be ready one cycle earlier. */
21394 if (insn_type == TYPE_FMOV
21395 && get_attr_memory (insn) == MEMORY_STORE
21396 && !ix86_agi_dependent (dep_insn, insn))
21400 case PROCESSOR_PENTIUMPRO:
21401 memory = get_attr_memory (insn);
21403 /* INT->FP conversion is expensive. */
21404 if (get_attr_fp_int_src (dep_insn))
21407 /* There is one cycle extra latency between an FP op and a store. */
21408 if (insn_type == TYPE_FMOV
21409 && (set = single_set (dep_insn)) != NULL_RTX
21410 && (set2 = single_set (insn)) != NULL_RTX
21411 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
21412 && MEM_P (SET_DEST (set2)))
21415 /* Show ability of reorder buffer to hide latency of load by executing
21416 in parallel with previous instruction in case
21417 previous instruction is not needed to compute the address. */
21418 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
21419 && !ix86_agi_dependent (dep_insn, insn))
21421 /* Claim moves to take one cycle, as core can issue one load
21422 at time and the next load can start cycle later. */
21423 if (dep_insn_type == TYPE_IMOV
21424 || dep_insn_type == TYPE_FMOV)
21432 memory = get_attr_memory (insn);
21434 /* The esp dependency is resolved before the instruction is really
21436 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
21437 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
21440 /* INT->FP conversion is expensive. */
21441 if (get_attr_fp_int_src (dep_insn))
21444 /* Show ability of reorder buffer to hide latency of load by executing
21445 in parallel with previous instruction in case
21446 previous instruction is not needed to compute the address. */
21447 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
21448 && !ix86_agi_dependent (dep_insn, insn))
21450 /* Claim moves to take one cycle, as core can issue one load
21451 at time and the next load can start cycle later. */
21452 if (dep_insn_type == TYPE_IMOV
21453 || dep_insn_type == TYPE_FMOV)
21462 case PROCESSOR_ATHLON:
21464 case PROCESSOR_AMDFAM10:
21465 case PROCESSOR_BDVER1:
21466 case PROCESSOR_ATOM:
21467 case PROCESSOR_GENERIC32:
21468 case PROCESSOR_GENERIC64:
21469 memory = get_attr_memory (insn);
21471 /* Show ability of reorder buffer to hide latency of load by executing
21472 in parallel with previous instruction in case
21473 previous instruction is not needed to compute the address. */
21474 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
21475 && !ix86_agi_dependent (dep_insn, insn))
21477 enum attr_unit unit = get_attr_unit (insn);
21480 /* Because of the difference between the length of integer and
21481 floating unit pipeline preparation stages, the memory operands
21482 for floating point are cheaper.
21484 ??? For Athlon it the difference is most probably 2. */
21485 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
21488 loadcost = TARGET_ATHLON ? 2 : 0;
21490 if (cost >= loadcost)
21503 /* How many alternative schedules to try. This should be as wide as the
21504 scheduling freedom in the DFA, but no wider. Making this value too
21505 large results extra work for the scheduler. */
21508 ia32_multipass_dfa_lookahead (void)
21512 case PROCESSOR_PENTIUM:
21515 case PROCESSOR_PENTIUMPRO:
21525 /* Compute the alignment given to a constant that is being placed in memory.
21526 EXP is the constant and ALIGN is the alignment that the object would
21528 The value of this function is used instead of that alignment to align
21532 ix86_constant_alignment (tree exp, int align)
21534 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
21535 || TREE_CODE (exp) == INTEGER_CST)
21537 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
21539 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
21542 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
21543 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
21544 return BITS_PER_WORD;
21549 /* Compute the alignment for a static variable.
21550 TYPE is the data type, and ALIGN is the alignment that
21551 the object would ordinarily have. The value of this function is used
21552 instead of that alignment to align the object. */
21555 ix86_data_alignment (tree type, int align)
21557 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
21559 if (AGGREGATE_TYPE_P (type)
21560 && TYPE_SIZE (type)
21561 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
21562 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
21563 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
21564 && align < max_align)
21567 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
21568 to 16byte boundary. */
21571 if (AGGREGATE_TYPE_P (type)
21572 && TYPE_SIZE (type)
21573 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
21574 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
21575 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
21579 if (TREE_CODE (type) == ARRAY_TYPE)
21581 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
21583 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
21586 else if (TREE_CODE (type) == COMPLEX_TYPE)
21589 if (TYPE_MODE (type) == DCmode && align < 64)
21591 if ((TYPE_MODE (type) == XCmode
21592 || TYPE_MODE (type) == TCmode) && align < 128)
21595 else if ((TREE_CODE (type) == RECORD_TYPE
21596 || TREE_CODE (type) == UNION_TYPE
21597 || TREE_CODE (type) == QUAL_UNION_TYPE)
21598 && TYPE_FIELDS (type))
21600 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
21602 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
21605 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
21606 || TREE_CODE (type) == INTEGER_TYPE)
21608 if (TYPE_MODE (type) == DFmode && align < 64)
21610 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
21617 /* Compute the alignment for a local variable or a stack slot. EXP is
21618 the data type or decl itself, MODE is the widest mode available and
21619 ALIGN is the alignment that the object would ordinarily have. The
21620 value of this macro is used instead of that alignment to align the
21624 ix86_local_alignment (tree exp, enum machine_mode mode,
21625 unsigned int align)
21629 if (exp && DECL_P (exp))
21631 type = TREE_TYPE (exp);
21640 /* Don't do dynamic stack realignment for long long objects with
21641 -mpreferred-stack-boundary=2. */
21644 && ix86_preferred_stack_boundary < 64
21645 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
21646 && (!type || !TYPE_USER_ALIGN (type))
21647 && (!decl || !DECL_USER_ALIGN (decl)))
21650 /* If TYPE is NULL, we are allocating a stack slot for caller-save
21651 register in MODE. We will return the largest alignment of XF
21655 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
21656 align = GET_MODE_ALIGNMENT (DFmode);
21660 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
21661 to 16byte boundary. Exact wording is:
21663 An array uses the same alignment as its elements, except that a local or
21664 global array variable of length at least 16 bytes or
21665 a C99 variable-length array variable always has alignment of at least 16 bytes.
21667 This was added to allow use of aligned SSE instructions at arrays. This
21668 rule is meant for static storage (where compiler can not do the analysis
21669 by itself). We follow it for automatic variables only when convenient.
21670 We fully control everything in the function compiled and functions from
21671 other unit can not rely on the alignment.
21673 Exclude va_list type. It is the common case of local array where
21674 we can not benefit from the alignment. */
21675 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
21678 if (AGGREGATE_TYPE_P (type)
21679 && (TYPE_MAIN_VARIANT (type)
21680 != TYPE_MAIN_VARIANT (va_list_type_node))
21681 && TYPE_SIZE (type)
21682 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
21683 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
21684 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
21687 if (TREE_CODE (type) == ARRAY_TYPE)
21689 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
21691 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
21694 else if (TREE_CODE (type) == COMPLEX_TYPE)
21696 if (TYPE_MODE (type) == DCmode && align < 64)
21698 if ((TYPE_MODE (type) == XCmode
21699 || TYPE_MODE (type) == TCmode) && align < 128)
21702 else if ((TREE_CODE (type) == RECORD_TYPE
21703 || TREE_CODE (type) == UNION_TYPE
21704 || TREE_CODE (type) == QUAL_UNION_TYPE)
21705 && TYPE_FIELDS (type))
21707 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
21709 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
21712 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
21713 || TREE_CODE (type) == INTEGER_TYPE)
21716 if (TYPE_MODE (type) == DFmode && align < 64)
21718 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
21724 /* Compute the minimum required alignment for dynamic stack realignment
21725 purposes for a local variable, parameter or a stack slot. EXP is
21726 the data type or decl itself, MODE is its mode and ALIGN is the
21727 alignment that the object would ordinarily have. */
21730 ix86_minimum_alignment (tree exp, enum machine_mode mode,
21731 unsigned int align)
21735 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
21738 if (exp && DECL_P (exp))
21740 type = TREE_TYPE (exp);
21749 /* Don't do dynamic stack realignment for long long objects with
21750 -mpreferred-stack-boundary=2. */
21751 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
21752 && (!type || !TYPE_USER_ALIGN (type))
21753 && (!decl || !DECL_USER_ALIGN (decl)))
21759 /* Find a location for the static chain incoming to a nested function.
21760 This is a register, unless all free registers are used by arguments. */
21763 ix86_static_chain (const_tree fndecl, bool incoming_p)
21767 if (!DECL_STATIC_CHAIN (fndecl))
21772 /* We always use R10 in 64-bit mode. */
21778 /* By default in 32-bit mode we use ECX to pass the static chain. */
21781 fntype = TREE_TYPE (fndecl);
21782 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
21784 /* Fastcall functions use ecx/edx for arguments, which leaves
21785 us with EAX for the static chain. */
21788 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
21790 /* Thiscall functions use ecx for arguments, which leaves
21791 us with EAX for the static chain. */
21794 else if (ix86_function_regparm (fntype, fndecl) == 3)
21796 /* For regparm 3, we have no free call-clobbered registers in
21797 which to store the static chain. In order to implement this,
21798 we have the trampoline push the static chain to the stack.
21799 However, we can't push a value below the return address when
21800 we call the nested function directly, so we have to use an
21801 alternate entry point. For this we use ESI, and have the
21802 alternate entry point push ESI, so that things appear the
21803 same once we're executing the nested function. */
21806 if (fndecl == current_function_decl)
21807 ix86_static_chain_on_stack = true;
21808 return gen_frame_mem (SImode,
21809 plus_constant (arg_pointer_rtx, -8));
21815 return gen_rtx_REG (Pmode, regno);
21818 /* Emit RTL insns to initialize the variable parts of a trampoline.
21819 FNDECL is the decl of the target address; M_TRAMP is a MEM for
21820 the trampoline, and CHAIN_VALUE is an RTX for the static chain
21821 to be passed to the target function. */
21824 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
21828 fnaddr = XEXP (DECL_RTL (fndecl), 0);
21835 /* Depending on the static chain location, either load a register
21836 with a constant, or push the constant to the stack. All of the
21837 instructions are the same size. */
21838 chain = ix86_static_chain (fndecl, true);
21841 if (REGNO (chain) == CX_REG)
21843 else if (REGNO (chain) == AX_REG)
21846 gcc_unreachable ();
21851 mem = adjust_address (m_tramp, QImode, 0);
21852 emit_move_insn (mem, gen_int_mode (opcode, QImode));
21854 mem = adjust_address (m_tramp, SImode, 1);
21855 emit_move_insn (mem, chain_value);
21857 /* Compute offset from the end of the jmp to the target function.
21858 In the case in which the trampoline stores the static chain on
21859 the stack, we need to skip the first insn which pushes the
21860 (call-saved) register static chain; this push is 1 byte. */
21861 disp = expand_binop (SImode, sub_optab, fnaddr,
21862 plus_constant (XEXP (m_tramp, 0),
21863 MEM_P (chain) ? 9 : 10),
21864 NULL_RTX, 1, OPTAB_DIRECT);
21866 mem = adjust_address (m_tramp, QImode, 5);
21867 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
21869 mem = adjust_address (m_tramp, SImode, 6);
21870 emit_move_insn (mem, disp);
21876 /* Load the function address to r11. Try to load address using
21877 the shorter movl instead of movabs. We may want to support
21878 movq for kernel mode, but kernel does not use trampolines at
21880 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
21882 fnaddr = copy_to_mode_reg (DImode, fnaddr);
21884 mem = adjust_address (m_tramp, HImode, offset);
21885 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
21887 mem = adjust_address (m_tramp, SImode, offset + 2);
21888 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
21893 mem = adjust_address (m_tramp, HImode, offset);
21894 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
21896 mem = adjust_address (m_tramp, DImode, offset + 2);
21897 emit_move_insn (mem, fnaddr);
21901 /* Load static chain using movabs to r10. */
21902 mem = adjust_address (m_tramp, HImode, offset);
21903 emit_move_insn (mem, gen_int_mode (0xba49, HImode));
21905 mem = adjust_address (m_tramp, DImode, offset + 2);
21906 emit_move_insn (mem, chain_value);
21909 /* Jump to r11; the last (unused) byte is a nop, only there to
21910 pad the write out to a single 32-bit store. */
21911 mem = adjust_address (m_tramp, SImode, offset);
21912 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
21915 gcc_assert (offset <= TRAMPOLINE_SIZE);
21918 #ifdef ENABLE_EXECUTE_STACK
21919 #ifdef CHECK_EXECUTE_STACK_ENABLED
21920 if (CHECK_EXECUTE_STACK_ENABLED)
21922 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
21923 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
21927 /* The following file contains several enumerations and data structures
21928 built from the definitions in i386-builtin-types.def. */
21930 #include "i386-builtin-types.inc"
21932 /* Table for the ix86 builtin non-function types. */
21933 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
21935 /* Retrieve an element from the above table, building some of
21936 the types lazily. */
21939 ix86_get_builtin_type (enum ix86_builtin_type tcode)
21941 unsigned int index;
21944 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
21946 type = ix86_builtin_type_tab[(int) tcode];
21950 gcc_assert (tcode > IX86_BT_LAST_PRIM);
21951 if (tcode <= IX86_BT_LAST_VECT)
21953 enum machine_mode mode;
21955 index = tcode - IX86_BT_LAST_PRIM - 1;
21956 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
21957 mode = ix86_builtin_type_vect_mode[index];
21959 type = build_vector_type_for_mode (itype, mode);
21965 index = tcode - IX86_BT_LAST_VECT - 1;
21966 if (tcode <= IX86_BT_LAST_PTR)
21967 quals = TYPE_UNQUALIFIED;
21969 quals = TYPE_QUAL_CONST;
21971 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
21972 if (quals != TYPE_UNQUALIFIED)
21973 itype = build_qualified_type (itype, quals);
21975 type = build_pointer_type (itype);
21978 ix86_builtin_type_tab[(int) tcode] = type;
21982 /* Table for the ix86 builtin function types. */
21983 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
21985 /* Retrieve an element from the above table, building some of
21986 the types lazily. */
21989 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
21993 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
21995 type = ix86_builtin_func_type_tab[(int) tcode];
21999 if (tcode <= IX86_BT_LAST_FUNC)
22001 unsigned start = ix86_builtin_func_start[(int) tcode];
22002 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
22003 tree rtype, atype, args = void_list_node;
22006 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
22007 for (i = after - 1; i > start; --i)
22009 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
22010 args = tree_cons (NULL, atype, args);
22013 type = build_function_type (rtype, args);
22017 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
22018 enum ix86_builtin_func_type icode;
22020 icode = ix86_builtin_func_alias_base[index];
22021 type = ix86_get_builtin_func_type (icode);
22024 ix86_builtin_func_type_tab[(int) tcode] = type;
22029 /* Codes for all the SSE/MMX builtins. */
22032 IX86_BUILTIN_ADDPS,
22033 IX86_BUILTIN_ADDSS,
22034 IX86_BUILTIN_DIVPS,
22035 IX86_BUILTIN_DIVSS,
22036 IX86_BUILTIN_MULPS,
22037 IX86_BUILTIN_MULSS,
22038 IX86_BUILTIN_SUBPS,
22039 IX86_BUILTIN_SUBSS,
22041 IX86_BUILTIN_CMPEQPS,
22042 IX86_BUILTIN_CMPLTPS,
22043 IX86_BUILTIN_CMPLEPS,
22044 IX86_BUILTIN_CMPGTPS,
22045 IX86_BUILTIN_CMPGEPS,
22046 IX86_BUILTIN_CMPNEQPS,
22047 IX86_BUILTIN_CMPNLTPS,
22048 IX86_BUILTIN_CMPNLEPS,
22049 IX86_BUILTIN_CMPNGTPS,
22050 IX86_BUILTIN_CMPNGEPS,
22051 IX86_BUILTIN_CMPORDPS,
22052 IX86_BUILTIN_CMPUNORDPS,
22053 IX86_BUILTIN_CMPEQSS,
22054 IX86_BUILTIN_CMPLTSS,
22055 IX86_BUILTIN_CMPLESS,
22056 IX86_BUILTIN_CMPNEQSS,
22057 IX86_BUILTIN_CMPNLTSS,
22058 IX86_BUILTIN_CMPNLESS,
22059 IX86_BUILTIN_CMPNGTSS,
22060 IX86_BUILTIN_CMPNGESS,
22061 IX86_BUILTIN_CMPORDSS,
22062 IX86_BUILTIN_CMPUNORDSS,
22064 IX86_BUILTIN_COMIEQSS,
22065 IX86_BUILTIN_COMILTSS,
22066 IX86_BUILTIN_COMILESS,
22067 IX86_BUILTIN_COMIGTSS,
22068 IX86_BUILTIN_COMIGESS,
22069 IX86_BUILTIN_COMINEQSS,
22070 IX86_BUILTIN_UCOMIEQSS,
22071 IX86_BUILTIN_UCOMILTSS,
22072 IX86_BUILTIN_UCOMILESS,
22073 IX86_BUILTIN_UCOMIGTSS,
22074 IX86_BUILTIN_UCOMIGESS,
22075 IX86_BUILTIN_UCOMINEQSS,
22077 IX86_BUILTIN_CVTPI2PS,
22078 IX86_BUILTIN_CVTPS2PI,
22079 IX86_BUILTIN_CVTSI2SS,
22080 IX86_BUILTIN_CVTSI642SS,
22081 IX86_BUILTIN_CVTSS2SI,
22082 IX86_BUILTIN_CVTSS2SI64,
22083 IX86_BUILTIN_CVTTPS2PI,
22084 IX86_BUILTIN_CVTTSS2SI,
22085 IX86_BUILTIN_CVTTSS2SI64,
22087 IX86_BUILTIN_MAXPS,
22088 IX86_BUILTIN_MAXSS,
22089 IX86_BUILTIN_MINPS,
22090 IX86_BUILTIN_MINSS,
22092 IX86_BUILTIN_LOADUPS,
22093 IX86_BUILTIN_STOREUPS,
22094 IX86_BUILTIN_MOVSS,
22096 IX86_BUILTIN_MOVHLPS,
22097 IX86_BUILTIN_MOVLHPS,
22098 IX86_BUILTIN_LOADHPS,
22099 IX86_BUILTIN_LOADLPS,
22100 IX86_BUILTIN_STOREHPS,
22101 IX86_BUILTIN_STORELPS,
22103 IX86_BUILTIN_MASKMOVQ,
22104 IX86_BUILTIN_MOVMSKPS,
22105 IX86_BUILTIN_PMOVMSKB,
22107 IX86_BUILTIN_MOVNTPS,
22108 IX86_BUILTIN_MOVNTQ,
22110 IX86_BUILTIN_LOADDQU,
22111 IX86_BUILTIN_STOREDQU,
22113 IX86_BUILTIN_PACKSSWB,
22114 IX86_BUILTIN_PACKSSDW,
22115 IX86_BUILTIN_PACKUSWB,
22117 IX86_BUILTIN_PADDB,
22118 IX86_BUILTIN_PADDW,
22119 IX86_BUILTIN_PADDD,
22120 IX86_BUILTIN_PADDQ,
22121 IX86_BUILTIN_PADDSB,
22122 IX86_BUILTIN_PADDSW,
22123 IX86_BUILTIN_PADDUSB,
22124 IX86_BUILTIN_PADDUSW,
22125 IX86_BUILTIN_PSUBB,
22126 IX86_BUILTIN_PSUBW,
22127 IX86_BUILTIN_PSUBD,
22128 IX86_BUILTIN_PSUBQ,
22129 IX86_BUILTIN_PSUBSB,
22130 IX86_BUILTIN_PSUBSW,
22131 IX86_BUILTIN_PSUBUSB,
22132 IX86_BUILTIN_PSUBUSW,
22135 IX86_BUILTIN_PANDN,
22139 IX86_BUILTIN_PAVGB,
22140 IX86_BUILTIN_PAVGW,
22142 IX86_BUILTIN_PCMPEQB,
22143 IX86_BUILTIN_PCMPEQW,
22144 IX86_BUILTIN_PCMPEQD,
22145 IX86_BUILTIN_PCMPGTB,
22146 IX86_BUILTIN_PCMPGTW,
22147 IX86_BUILTIN_PCMPGTD,
22149 IX86_BUILTIN_PMADDWD,
22151 IX86_BUILTIN_PMAXSW,
22152 IX86_BUILTIN_PMAXUB,
22153 IX86_BUILTIN_PMINSW,
22154 IX86_BUILTIN_PMINUB,
22156 IX86_BUILTIN_PMULHUW,
22157 IX86_BUILTIN_PMULHW,
22158 IX86_BUILTIN_PMULLW,
22160 IX86_BUILTIN_PSADBW,
22161 IX86_BUILTIN_PSHUFW,
22163 IX86_BUILTIN_PSLLW,
22164 IX86_BUILTIN_PSLLD,
22165 IX86_BUILTIN_PSLLQ,
22166 IX86_BUILTIN_PSRAW,
22167 IX86_BUILTIN_PSRAD,
22168 IX86_BUILTIN_PSRLW,
22169 IX86_BUILTIN_PSRLD,
22170 IX86_BUILTIN_PSRLQ,
22171 IX86_BUILTIN_PSLLWI,
22172 IX86_BUILTIN_PSLLDI,
22173 IX86_BUILTIN_PSLLQI,
22174 IX86_BUILTIN_PSRAWI,
22175 IX86_BUILTIN_PSRADI,
22176 IX86_BUILTIN_PSRLWI,
22177 IX86_BUILTIN_PSRLDI,
22178 IX86_BUILTIN_PSRLQI,
22180 IX86_BUILTIN_PUNPCKHBW,
22181 IX86_BUILTIN_PUNPCKHWD,
22182 IX86_BUILTIN_PUNPCKHDQ,
22183 IX86_BUILTIN_PUNPCKLBW,
22184 IX86_BUILTIN_PUNPCKLWD,
22185 IX86_BUILTIN_PUNPCKLDQ,
22187 IX86_BUILTIN_SHUFPS,
22189 IX86_BUILTIN_RCPPS,
22190 IX86_BUILTIN_RCPSS,
22191 IX86_BUILTIN_RSQRTPS,
22192 IX86_BUILTIN_RSQRTPS_NR,
22193 IX86_BUILTIN_RSQRTSS,
22194 IX86_BUILTIN_RSQRTF,
22195 IX86_BUILTIN_SQRTPS,
22196 IX86_BUILTIN_SQRTPS_NR,
22197 IX86_BUILTIN_SQRTSS,
22199 IX86_BUILTIN_UNPCKHPS,
22200 IX86_BUILTIN_UNPCKLPS,
22202 IX86_BUILTIN_ANDPS,
22203 IX86_BUILTIN_ANDNPS,
22205 IX86_BUILTIN_XORPS,
22208 IX86_BUILTIN_LDMXCSR,
22209 IX86_BUILTIN_STMXCSR,
22210 IX86_BUILTIN_SFENCE,
22212 /* 3DNow! Original */
22213 IX86_BUILTIN_FEMMS,
22214 IX86_BUILTIN_PAVGUSB,
22215 IX86_BUILTIN_PF2ID,
22216 IX86_BUILTIN_PFACC,
22217 IX86_BUILTIN_PFADD,
22218 IX86_BUILTIN_PFCMPEQ,
22219 IX86_BUILTIN_PFCMPGE,
22220 IX86_BUILTIN_PFCMPGT,
22221 IX86_BUILTIN_PFMAX,
22222 IX86_BUILTIN_PFMIN,
22223 IX86_BUILTIN_PFMUL,
22224 IX86_BUILTIN_PFRCP,
22225 IX86_BUILTIN_PFRCPIT1,
22226 IX86_BUILTIN_PFRCPIT2,
22227 IX86_BUILTIN_PFRSQIT1,
22228 IX86_BUILTIN_PFRSQRT,
22229 IX86_BUILTIN_PFSUB,
22230 IX86_BUILTIN_PFSUBR,
22231 IX86_BUILTIN_PI2FD,
22232 IX86_BUILTIN_PMULHRW,
22234 /* 3DNow! Athlon Extensions */
22235 IX86_BUILTIN_PF2IW,
22236 IX86_BUILTIN_PFNACC,
22237 IX86_BUILTIN_PFPNACC,
22238 IX86_BUILTIN_PI2FW,
22239 IX86_BUILTIN_PSWAPDSI,
22240 IX86_BUILTIN_PSWAPDSF,
22243 IX86_BUILTIN_ADDPD,
22244 IX86_BUILTIN_ADDSD,
22245 IX86_BUILTIN_DIVPD,
22246 IX86_BUILTIN_DIVSD,
22247 IX86_BUILTIN_MULPD,
22248 IX86_BUILTIN_MULSD,
22249 IX86_BUILTIN_SUBPD,
22250 IX86_BUILTIN_SUBSD,
22252 IX86_BUILTIN_CMPEQPD,
22253 IX86_BUILTIN_CMPLTPD,
22254 IX86_BUILTIN_CMPLEPD,
22255 IX86_BUILTIN_CMPGTPD,
22256 IX86_BUILTIN_CMPGEPD,
22257 IX86_BUILTIN_CMPNEQPD,
22258 IX86_BUILTIN_CMPNLTPD,
22259 IX86_BUILTIN_CMPNLEPD,
22260 IX86_BUILTIN_CMPNGTPD,
22261 IX86_BUILTIN_CMPNGEPD,
22262 IX86_BUILTIN_CMPORDPD,
22263 IX86_BUILTIN_CMPUNORDPD,
22264 IX86_BUILTIN_CMPEQSD,
22265 IX86_BUILTIN_CMPLTSD,
22266 IX86_BUILTIN_CMPLESD,
22267 IX86_BUILTIN_CMPNEQSD,
22268 IX86_BUILTIN_CMPNLTSD,
22269 IX86_BUILTIN_CMPNLESD,
22270 IX86_BUILTIN_CMPORDSD,
22271 IX86_BUILTIN_CMPUNORDSD,
22273 IX86_BUILTIN_COMIEQSD,
22274 IX86_BUILTIN_COMILTSD,
22275 IX86_BUILTIN_COMILESD,
22276 IX86_BUILTIN_COMIGTSD,
22277 IX86_BUILTIN_COMIGESD,
22278 IX86_BUILTIN_COMINEQSD,
22279 IX86_BUILTIN_UCOMIEQSD,
22280 IX86_BUILTIN_UCOMILTSD,
22281 IX86_BUILTIN_UCOMILESD,
22282 IX86_BUILTIN_UCOMIGTSD,
22283 IX86_BUILTIN_UCOMIGESD,
22284 IX86_BUILTIN_UCOMINEQSD,
22286 IX86_BUILTIN_MAXPD,
22287 IX86_BUILTIN_MAXSD,
22288 IX86_BUILTIN_MINPD,
22289 IX86_BUILTIN_MINSD,
22291 IX86_BUILTIN_ANDPD,
22292 IX86_BUILTIN_ANDNPD,
22294 IX86_BUILTIN_XORPD,
22296 IX86_BUILTIN_SQRTPD,
22297 IX86_BUILTIN_SQRTSD,
22299 IX86_BUILTIN_UNPCKHPD,
22300 IX86_BUILTIN_UNPCKLPD,
22302 IX86_BUILTIN_SHUFPD,
22304 IX86_BUILTIN_LOADUPD,
22305 IX86_BUILTIN_STOREUPD,
22306 IX86_BUILTIN_MOVSD,
22308 IX86_BUILTIN_LOADHPD,
22309 IX86_BUILTIN_LOADLPD,
22311 IX86_BUILTIN_CVTDQ2PD,
22312 IX86_BUILTIN_CVTDQ2PS,
22314 IX86_BUILTIN_CVTPD2DQ,
22315 IX86_BUILTIN_CVTPD2PI,
22316 IX86_BUILTIN_CVTPD2PS,
22317 IX86_BUILTIN_CVTTPD2DQ,
22318 IX86_BUILTIN_CVTTPD2PI,
22320 IX86_BUILTIN_CVTPI2PD,
22321 IX86_BUILTIN_CVTSI2SD,
22322 IX86_BUILTIN_CVTSI642SD,
22324 IX86_BUILTIN_CVTSD2SI,
22325 IX86_BUILTIN_CVTSD2SI64,
22326 IX86_BUILTIN_CVTSD2SS,
22327 IX86_BUILTIN_CVTSS2SD,
22328 IX86_BUILTIN_CVTTSD2SI,
22329 IX86_BUILTIN_CVTTSD2SI64,
22331 IX86_BUILTIN_CVTPS2DQ,
22332 IX86_BUILTIN_CVTPS2PD,
22333 IX86_BUILTIN_CVTTPS2DQ,
22335 IX86_BUILTIN_MOVNTI,
22336 IX86_BUILTIN_MOVNTPD,
22337 IX86_BUILTIN_MOVNTDQ,
22339 IX86_BUILTIN_MOVQ128,
22342 IX86_BUILTIN_MASKMOVDQU,
22343 IX86_BUILTIN_MOVMSKPD,
22344 IX86_BUILTIN_PMOVMSKB128,
22346 IX86_BUILTIN_PACKSSWB128,
22347 IX86_BUILTIN_PACKSSDW128,
22348 IX86_BUILTIN_PACKUSWB128,
22350 IX86_BUILTIN_PADDB128,
22351 IX86_BUILTIN_PADDW128,
22352 IX86_BUILTIN_PADDD128,
22353 IX86_BUILTIN_PADDQ128,
22354 IX86_BUILTIN_PADDSB128,
22355 IX86_BUILTIN_PADDSW128,
22356 IX86_BUILTIN_PADDUSB128,
22357 IX86_BUILTIN_PADDUSW128,
22358 IX86_BUILTIN_PSUBB128,
22359 IX86_BUILTIN_PSUBW128,
22360 IX86_BUILTIN_PSUBD128,
22361 IX86_BUILTIN_PSUBQ128,
22362 IX86_BUILTIN_PSUBSB128,
22363 IX86_BUILTIN_PSUBSW128,
22364 IX86_BUILTIN_PSUBUSB128,
22365 IX86_BUILTIN_PSUBUSW128,
22367 IX86_BUILTIN_PAND128,
22368 IX86_BUILTIN_PANDN128,
22369 IX86_BUILTIN_POR128,
22370 IX86_BUILTIN_PXOR128,
22372 IX86_BUILTIN_PAVGB128,
22373 IX86_BUILTIN_PAVGW128,
22375 IX86_BUILTIN_PCMPEQB128,
22376 IX86_BUILTIN_PCMPEQW128,
22377 IX86_BUILTIN_PCMPEQD128,
22378 IX86_BUILTIN_PCMPGTB128,
22379 IX86_BUILTIN_PCMPGTW128,
22380 IX86_BUILTIN_PCMPGTD128,
22382 IX86_BUILTIN_PMADDWD128,
22384 IX86_BUILTIN_PMAXSW128,
22385 IX86_BUILTIN_PMAXUB128,
22386 IX86_BUILTIN_PMINSW128,
22387 IX86_BUILTIN_PMINUB128,
22389 IX86_BUILTIN_PMULUDQ,
22390 IX86_BUILTIN_PMULUDQ128,
22391 IX86_BUILTIN_PMULHUW128,
22392 IX86_BUILTIN_PMULHW128,
22393 IX86_BUILTIN_PMULLW128,
22395 IX86_BUILTIN_PSADBW128,
22396 IX86_BUILTIN_PSHUFHW,
22397 IX86_BUILTIN_PSHUFLW,
22398 IX86_BUILTIN_PSHUFD,
22400 IX86_BUILTIN_PSLLDQI128,
22401 IX86_BUILTIN_PSLLWI128,
22402 IX86_BUILTIN_PSLLDI128,
22403 IX86_BUILTIN_PSLLQI128,
22404 IX86_BUILTIN_PSRAWI128,
22405 IX86_BUILTIN_PSRADI128,
22406 IX86_BUILTIN_PSRLDQI128,
22407 IX86_BUILTIN_PSRLWI128,
22408 IX86_BUILTIN_PSRLDI128,
22409 IX86_BUILTIN_PSRLQI128,
22411 IX86_BUILTIN_PSLLDQ128,
22412 IX86_BUILTIN_PSLLW128,
22413 IX86_BUILTIN_PSLLD128,
22414 IX86_BUILTIN_PSLLQ128,
22415 IX86_BUILTIN_PSRAW128,
22416 IX86_BUILTIN_PSRAD128,
22417 IX86_BUILTIN_PSRLW128,
22418 IX86_BUILTIN_PSRLD128,
22419 IX86_BUILTIN_PSRLQ128,
22421 IX86_BUILTIN_PUNPCKHBW128,
22422 IX86_BUILTIN_PUNPCKHWD128,
22423 IX86_BUILTIN_PUNPCKHDQ128,
22424 IX86_BUILTIN_PUNPCKHQDQ128,
22425 IX86_BUILTIN_PUNPCKLBW128,
22426 IX86_BUILTIN_PUNPCKLWD128,
22427 IX86_BUILTIN_PUNPCKLDQ128,
22428 IX86_BUILTIN_PUNPCKLQDQ128,
22430 IX86_BUILTIN_CLFLUSH,
22431 IX86_BUILTIN_MFENCE,
22432 IX86_BUILTIN_LFENCE,
22434 IX86_BUILTIN_BSRSI,
22435 IX86_BUILTIN_BSRDI,
22436 IX86_BUILTIN_RDPMC,
22437 IX86_BUILTIN_RDTSC,
22438 IX86_BUILTIN_RDTSCP,
22439 IX86_BUILTIN_ROLQI,
22440 IX86_BUILTIN_ROLHI,
22441 IX86_BUILTIN_RORQI,
22442 IX86_BUILTIN_RORHI,
22445 IX86_BUILTIN_ADDSUBPS,
22446 IX86_BUILTIN_HADDPS,
22447 IX86_BUILTIN_HSUBPS,
22448 IX86_BUILTIN_MOVSHDUP,
22449 IX86_BUILTIN_MOVSLDUP,
22450 IX86_BUILTIN_ADDSUBPD,
22451 IX86_BUILTIN_HADDPD,
22452 IX86_BUILTIN_HSUBPD,
22453 IX86_BUILTIN_LDDQU,
22455 IX86_BUILTIN_MONITOR,
22456 IX86_BUILTIN_MWAIT,
22459 IX86_BUILTIN_PHADDW,
22460 IX86_BUILTIN_PHADDD,
22461 IX86_BUILTIN_PHADDSW,
22462 IX86_BUILTIN_PHSUBW,
22463 IX86_BUILTIN_PHSUBD,
22464 IX86_BUILTIN_PHSUBSW,
22465 IX86_BUILTIN_PMADDUBSW,
22466 IX86_BUILTIN_PMULHRSW,
22467 IX86_BUILTIN_PSHUFB,
22468 IX86_BUILTIN_PSIGNB,
22469 IX86_BUILTIN_PSIGNW,
22470 IX86_BUILTIN_PSIGND,
22471 IX86_BUILTIN_PALIGNR,
22472 IX86_BUILTIN_PABSB,
22473 IX86_BUILTIN_PABSW,
22474 IX86_BUILTIN_PABSD,
22476 IX86_BUILTIN_PHADDW128,
22477 IX86_BUILTIN_PHADDD128,
22478 IX86_BUILTIN_PHADDSW128,
22479 IX86_BUILTIN_PHSUBW128,
22480 IX86_BUILTIN_PHSUBD128,
22481 IX86_BUILTIN_PHSUBSW128,
22482 IX86_BUILTIN_PMADDUBSW128,
22483 IX86_BUILTIN_PMULHRSW128,
22484 IX86_BUILTIN_PSHUFB128,
22485 IX86_BUILTIN_PSIGNB128,
22486 IX86_BUILTIN_PSIGNW128,
22487 IX86_BUILTIN_PSIGND128,
22488 IX86_BUILTIN_PALIGNR128,
22489 IX86_BUILTIN_PABSB128,
22490 IX86_BUILTIN_PABSW128,
22491 IX86_BUILTIN_PABSD128,
22493 /* AMDFAM10 - SSE4A New Instructions. */
22494 IX86_BUILTIN_MOVNTSD,
22495 IX86_BUILTIN_MOVNTSS,
22496 IX86_BUILTIN_EXTRQI,
22497 IX86_BUILTIN_EXTRQ,
22498 IX86_BUILTIN_INSERTQI,
22499 IX86_BUILTIN_INSERTQ,
22502 IX86_BUILTIN_BLENDPD,
22503 IX86_BUILTIN_BLENDPS,
22504 IX86_BUILTIN_BLENDVPD,
22505 IX86_BUILTIN_BLENDVPS,
22506 IX86_BUILTIN_PBLENDVB128,
22507 IX86_BUILTIN_PBLENDW128,
22512 IX86_BUILTIN_INSERTPS128,
22514 IX86_BUILTIN_MOVNTDQA,
22515 IX86_BUILTIN_MPSADBW128,
22516 IX86_BUILTIN_PACKUSDW128,
22517 IX86_BUILTIN_PCMPEQQ,
22518 IX86_BUILTIN_PHMINPOSUW128,
22520 IX86_BUILTIN_PMAXSB128,
22521 IX86_BUILTIN_PMAXSD128,
22522 IX86_BUILTIN_PMAXUD128,
22523 IX86_BUILTIN_PMAXUW128,
22525 IX86_BUILTIN_PMINSB128,
22526 IX86_BUILTIN_PMINSD128,
22527 IX86_BUILTIN_PMINUD128,
22528 IX86_BUILTIN_PMINUW128,
22530 IX86_BUILTIN_PMOVSXBW128,
22531 IX86_BUILTIN_PMOVSXBD128,
22532 IX86_BUILTIN_PMOVSXBQ128,
22533 IX86_BUILTIN_PMOVSXWD128,
22534 IX86_BUILTIN_PMOVSXWQ128,
22535 IX86_BUILTIN_PMOVSXDQ128,
22537 IX86_BUILTIN_PMOVZXBW128,
22538 IX86_BUILTIN_PMOVZXBD128,
22539 IX86_BUILTIN_PMOVZXBQ128,
22540 IX86_BUILTIN_PMOVZXWD128,
22541 IX86_BUILTIN_PMOVZXWQ128,
22542 IX86_BUILTIN_PMOVZXDQ128,
22544 IX86_BUILTIN_PMULDQ128,
22545 IX86_BUILTIN_PMULLD128,
22547 IX86_BUILTIN_ROUNDPD,
22548 IX86_BUILTIN_ROUNDPS,
22549 IX86_BUILTIN_ROUNDSD,
22550 IX86_BUILTIN_ROUNDSS,
22552 IX86_BUILTIN_PTESTZ,
22553 IX86_BUILTIN_PTESTC,
22554 IX86_BUILTIN_PTESTNZC,
22556 IX86_BUILTIN_VEC_INIT_V2SI,
22557 IX86_BUILTIN_VEC_INIT_V4HI,
22558 IX86_BUILTIN_VEC_INIT_V8QI,
22559 IX86_BUILTIN_VEC_EXT_V2DF,
22560 IX86_BUILTIN_VEC_EXT_V2DI,
22561 IX86_BUILTIN_VEC_EXT_V4SF,
22562 IX86_BUILTIN_VEC_EXT_V4SI,
22563 IX86_BUILTIN_VEC_EXT_V8HI,
22564 IX86_BUILTIN_VEC_EXT_V2SI,
22565 IX86_BUILTIN_VEC_EXT_V4HI,
22566 IX86_BUILTIN_VEC_EXT_V16QI,
22567 IX86_BUILTIN_VEC_SET_V2DI,
22568 IX86_BUILTIN_VEC_SET_V4SF,
22569 IX86_BUILTIN_VEC_SET_V4SI,
22570 IX86_BUILTIN_VEC_SET_V8HI,
22571 IX86_BUILTIN_VEC_SET_V4HI,
22572 IX86_BUILTIN_VEC_SET_V16QI,
22574 IX86_BUILTIN_VEC_PACK_SFIX,
22577 IX86_BUILTIN_CRC32QI,
22578 IX86_BUILTIN_CRC32HI,
22579 IX86_BUILTIN_CRC32SI,
22580 IX86_BUILTIN_CRC32DI,
22582 IX86_BUILTIN_PCMPESTRI128,
22583 IX86_BUILTIN_PCMPESTRM128,
22584 IX86_BUILTIN_PCMPESTRA128,
22585 IX86_BUILTIN_PCMPESTRC128,
22586 IX86_BUILTIN_PCMPESTRO128,
22587 IX86_BUILTIN_PCMPESTRS128,
22588 IX86_BUILTIN_PCMPESTRZ128,
22589 IX86_BUILTIN_PCMPISTRI128,
22590 IX86_BUILTIN_PCMPISTRM128,
22591 IX86_BUILTIN_PCMPISTRA128,
22592 IX86_BUILTIN_PCMPISTRC128,
22593 IX86_BUILTIN_PCMPISTRO128,
22594 IX86_BUILTIN_PCMPISTRS128,
22595 IX86_BUILTIN_PCMPISTRZ128,
22597 IX86_BUILTIN_PCMPGTQ,
22599 /* AES instructions */
22600 IX86_BUILTIN_AESENC128,
22601 IX86_BUILTIN_AESENCLAST128,
22602 IX86_BUILTIN_AESDEC128,
22603 IX86_BUILTIN_AESDECLAST128,
22604 IX86_BUILTIN_AESIMC128,
22605 IX86_BUILTIN_AESKEYGENASSIST128,
22607 /* PCLMUL instruction */
22608 IX86_BUILTIN_PCLMULQDQ128,
22611 IX86_BUILTIN_ADDPD256,
22612 IX86_BUILTIN_ADDPS256,
22613 IX86_BUILTIN_ADDSUBPD256,
22614 IX86_BUILTIN_ADDSUBPS256,
22615 IX86_BUILTIN_ANDPD256,
22616 IX86_BUILTIN_ANDPS256,
22617 IX86_BUILTIN_ANDNPD256,
22618 IX86_BUILTIN_ANDNPS256,
22619 IX86_BUILTIN_BLENDPD256,
22620 IX86_BUILTIN_BLENDPS256,
22621 IX86_BUILTIN_BLENDVPD256,
22622 IX86_BUILTIN_BLENDVPS256,
22623 IX86_BUILTIN_DIVPD256,
22624 IX86_BUILTIN_DIVPS256,
22625 IX86_BUILTIN_DPPS256,
22626 IX86_BUILTIN_HADDPD256,
22627 IX86_BUILTIN_HADDPS256,
22628 IX86_BUILTIN_HSUBPD256,
22629 IX86_BUILTIN_HSUBPS256,
22630 IX86_BUILTIN_MAXPD256,
22631 IX86_BUILTIN_MAXPS256,
22632 IX86_BUILTIN_MINPD256,
22633 IX86_BUILTIN_MINPS256,
22634 IX86_BUILTIN_MULPD256,
22635 IX86_BUILTIN_MULPS256,
22636 IX86_BUILTIN_ORPD256,
22637 IX86_BUILTIN_ORPS256,
22638 IX86_BUILTIN_SHUFPD256,
22639 IX86_BUILTIN_SHUFPS256,
22640 IX86_BUILTIN_SUBPD256,
22641 IX86_BUILTIN_SUBPS256,
22642 IX86_BUILTIN_XORPD256,
22643 IX86_BUILTIN_XORPS256,
22644 IX86_BUILTIN_CMPSD,
22645 IX86_BUILTIN_CMPSS,
22646 IX86_BUILTIN_CMPPD,
22647 IX86_BUILTIN_CMPPS,
22648 IX86_BUILTIN_CMPPD256,
22649 IX86_BUILTIN_CMPPS256,
22650 IX86_BUILTIN_CVTDQ2PD256,
22651 IX86_BUILTIN_CVTDQ2PS256,
22652 IX86_BUILTIN_CVTPD2PS256,
22653 IX86_BUILTIN_CVTPS2DQ256,
22654 IX86_BUILTIN_CVTPS2PD256,
22655 IX86_BUILTIN_CVTTPD2DQ256,
22656 IX86_BUILTIN_CVTPD2DQ256,
22657 IX86_BUILTIN_CVTTPS2DQ256,
22658 IX86_BUILTIN_EXTRACTF128PD256,
22659 IX86_BUILTIN_EXTRACTF128PS256,
22660 IX86_BUILTIN_EXTRACTF128SI256,
22661 IX86_BUILTIN_VZEROALL,
22662 IX86_BUILTIN_VZEROUPPER,
22663 IX86_BUILTIN_VPERMILVARPD,
22664 IX86_BUILTIN_VPERMILVARPS,
22665 IX86_BUILTIN_VPERMILVARPD256,
22666 IX86_BUILTIN_VPERMILVARPS256,
22667 IX86_BUILTIN_VPERMILPD,
22668 IX86_BUILTIN_VPERMILPS,
22669 IX86_BUILTIN_VPERMILPD256,
22670 IX86_BUILTIN_VPERMILPS256,
22671 IX86_BUILTIN_VPERMIL2PD,
22672 IX86_BUILTIN_VPERMIL2PS,
22673 IX86_BUILTIN_VPERMIL2PD256,
22674 IX86_BUILTIN_VPERMIL2PS256,
22675 IX86_BUILTIN_VPERM2F128PD256,
22676 IX86_BUILTIN_VPERM2F128PS256,
22677 IX86_BUILTIN_VPERM2F128SI256,
22678 IX86_BUILTIN_VBROADCASTSS,
22679 IX86_BUILTIN_VBROADCASTSD256,
22680 IX86_BUILTIN_VBROADCASTSS256,
22681 IX86_BUILTIN_VBROADCASTPD256,
22682 IX86_BUILTIN_VBROADCASTPS256,
22683 IX86_BUILTIN_VINSERTF128PD256,
22684 IX86_BUILTIN_VINSERTF128PS256,
22685 IX86_BUILTIN_VINSERTF128SI256,
22686 IX86_BUILTIN_LOADUPD256,
22687 IX86_BUILTIN_LOADUPS256,
22688 IX86_BUILTIN_STOREUPD256,
22689 IX86_BUILTIN_STOREUPS256,
22690 IX86_BUILTIN_LDDQU256,
22691 IX86_BUILTIN_MOVNTDQ256,
22692 IX86_BUILTIN_MOVNTPD256,
22693 IX86_BUILTIN_MOVNTPS256,
22694 IX86_BUILTIN_LOADDQU256,
22695 IX86_BUILTIN_STOREDQU256,
22696 IX86_BUILTIN_MASKLOADPD,
22697 IX86_BUILTIN_MASKLOADPS,
22698 IX86_BUILTIN_MASKSTOREPD,
22699 IX86_BUILTIN_MASKSTOREPS,
22700 IX86_BUILTIN_MASKLOADPD256,
22701 IX86_BUILTIN_MASKLOADPS256,
22702 IX86_BUILTIN_MASKSTOREPD256,
22703 IX86_BUILTIN_MASKSTOREPS256,
22704 IX86_BUILTIN_MOVSHDUP256,
22705 IX86_BUILTIN_MOVSLDUP256,
22706 IX86_BUILTIN_MOVDDUP256,
22708 IX86_BUILTIN_SQRTPD256,
22709 IX86_BUILTIN_SQRTPS256,
22710 IX86_BUILTIN_SQRTPS_NR256,
22711 IX86_BUILTIN_RSQRTPS256,
22712 IX86_BUILTIN_RSQRTPS_NR256,
22714 IX86_BUILTIN_RCPPS256,
22716 IX86_BUILTIN_ROUNDPD256,
22717 IX86_BUILTIN_ROUNDPS256,
22719 IX86_BUILTIN_UNPCKHPD256,
22720 IX86_BUILTIN_UNPCKLPD256,
22721 IX86_BUILTIN_UNPCKHPS256,
22722 IX86_BUILTIN_UNPCKLPS256,
22724 IX86_BUILTIN_SI256_SI,
22725 IX86_BUILTIN_PS256_PS,
22726 IX86_BUILTIN_PD256_PD,
22727 IX86_BUILTIN_SI_SI256,
22728 IX86_BUILTIN_PS_PS256,
22729 IX86_BUILTIN_PD_PD256,
22731 IX86_BUILTIN_VTESTZPD,
22732 IX86_BUILTIN_VTESTCPD,
22733 IX86_BUILTIN_VTESTNZCPD,
22734 IX86_BUILTIN_VTESTZPS,
22735 IX86_BUILTIN_VTESTCPS,
22736 IX86_BUILTIN_VTESTNZCPS,
22737 IX86_BUILTIN_VTESTZPD256,
22738 IX86_BUILTIN_VTESTCPD256,
22739 IX86_BUILTIN_VTESTNZCPD256,
22740 IX86_BUILTIN_VTESTZPS256,
22741 IX86_BUILTIN_VTESTCPS256,
22742 IX86_BUILTIN_VTESTNZCPS256,
22743 IX86_BUILTIN_PTESTZ256,
22744 IX86_BUILTIN_PTESTC256,
22745 IX86_BUILTIN_PTESTNZC256,
22747 IX86_BUILTIN_MOVMSKPD256,
22748 IX86_BUILTIN_MOVMSKPS256,
22750 /* TFmode support builtins. */
22752 IX86_BUILTIN_HUGE_VALQ,
22753 IX86_BUILTIN_FABSQ,
22754 IX86_BUILTIN_COPYSIGNQ,
22756 /* Vectorizer support builtins. */
22757 IX86_BUILTIN_CPYSGNPS,
22758 IX86_BUILTIN_CPYSGNPD,
22759 IX86_BUILTIN_CPYSGNPS256,
22760 IX86_BUILTIN_CPYSGNPD256,
22762 IX86_BUILTIN_CVTUDQ2PS,
22764 IX86_BUILTIN_VEC_PERM_V2DF,
22765 IX86_BUILTIN_VEC_PERM_V4SF,
22766 IX86_BUILTIN_VEC_PERM_V2DI,
22767 IX86_BUILTIN_VEC_PERM_V4SI,
22768 IX86_BUILTIN_VEC_PERM_V8HI,
22769 IX86_BUILTIN_VEC_PERM_V16QI,
22770 IX86_BUILTIN_VEC_PERM_V2DI_U,
22771 IX86_BUILTIN_VEC_PERM_V4SI_U,
22772 IX86_BUILTIN_VEC_PERM_V8HI_U,
22773 IX86_BUILTIN_VEC_PERM_V16QI_U,
22774 IX86_BUILTIN_VEC_PERM_V4DF,
22775 IX86_BUILTIN_VEC_PERM_V8SF,
22777 /* FMA4 and XOP instructions. */
22778 IX86_BUILTIN_VFMADDSS,
22779 IX86_BUILTIN_VFMADDSD,
22780 IX86_BUILTIN_VFMADDPS,
22781 IX86_BUILTIN_VFMADDPD,
22782 IX86_BUILTIN_VFMSUBSS,
22783 IX86_BUILTIN_VFMSUBSD,
22784 IX86_BUILTIN_VFMSUBPS,
22785 IX86_BUILTIN_VFMSUBPD,
22786 IX86_BUILTIN_VFMADDSUBPS,
22787 IX86_BUILTIN_VFMADDSUBPD,
22788 IX86_BUILTIN_VFMSUBADDPS,
22789 IX86_BUILTIN_VFMSUBADDPD,
22790 IX86_BUILTIN_VFNMADDSS,
22791 IX86_BUILTIN_VFNMADDSD,
22792 IX86_BUILTIN_VFNMADDPS,
22793 IX86_BUILTIN_VFNMADDPD,
22794 IX86_BUILTIN_VFNMSUBSS,
22795 IX86_BUILTIN_VFNMSUBSD,
22796 IX86_BUILTIN_VFNMSUBPS,
22797 IX86_BUILTIN_VFNMSUBPD,
22798 IX86_BUILTIN_VFMADDPS256,
22799 IX86_BUILTIN_VFMADDPD256,
22800 IX86_BUILTIN_VFMSUBPS256,
22801 IX86_BUILTIN_VFMSUBPD256,
22802 IX86_BUILTIN_VFMADDSUBPS256,
22803 IX86_BUILTIN_VFMADDSUBPD256,
22804 IX86_BUILTIN_VFMSUBADDPS256,
22805 IX86_BUILTIN_VFMSUBADDPD256,
22806 IX86_BUILTIN_VFNMADDPS256,
22807 IX86_BUILTIN_VFNMADDPD256,
22808 IX86_BUILTIN_VFNMSUBPS256,
22809 IX86_BUILTIN_VFNMSUBPD256,
22811 IX86_BUILTIN_VPCMOV,
22812 IX86_BUILTIN_VPCMOV_V2DI,
22813 IX86_BUILTIN_VPCMOV_V4SI,
22814 IX86_BUILTIN_VPCMOV_V8HI,
22815 IX86_BUILTIN_VPCMOV_V16QI,
22816 IX86_BUILTIN_VPCMOV_V4SF,
22817 IX86_BUILTIN_VPCMOV_V2DF,
22818 IX86_BUILTIN_VPCMOV256,
22819 IX86_BUILTIN_VPCMOV_V4DI256,
22820 IX86_BUILTIN_VPCMOV_V8SI256,
22821 IX86_BUILTIN_VPCMOV_V16HI256,
22822 IX86_BUILTIN_VPCMOV_V32QI256,
22823 IX86_BUILTIN_VPCMOV_V8SF256,
22824 IX86_BUILTIN_VPCMOV_V4DF256,
22826 IX86_BUILTIN_VPPERM,
22828 IX86_BUILTIN_VPMACSSWW,
22829 IX86_BUILTIN_VPMACSWW,
22830 IX86_BUILTIN_VPMACSSWD,
22831 IX86_BUILTIN_VPMACSWD,
22832 IX86_BUILTIN_VPMACSSDD,
22833 IX86_BUILTIN_VPMACSDD,
22834 IX86_BUILTIN_VPMACSSDQL,
22835 IX86_BUILTIN_VPMACSSDQH,
22836 IX86_BUILTIN_VPMACSDQL,
22837 IX86_BUILTIN_VPMACSDQH,
22838 IX86_BUILTIN_VPMADCSSWD,
22839 IX86_BUILTIN_VPMADCSWD,
22841 IX86_BUILTIN_VPHADDBW,
22842 IX86_BUILTIN_VPHADDBD,
22843 IX86_BUILTIN_VPHADDBQ,
22844 IX86_BUILTIN_VPHADDWD,
22845 IX86_BUILTIN_VPHADDWQ,
22846 IX86_BUILTIN_VPHADDDQ,
22847 IX86_BUILTIN_VPHADDUBW,
22848 IX86_BUILTIN_VPHADDUBD,
22849 IX86_BUILTIN_VPHADDUBQ,
22850 IX86_BUILTIN_VPHADDUWD,
22851 IX86_BUILTIN_VPHADDUWQ,
22852 IX86_BUILTIN_VPHADDUDQ,
22853 IX86_BUILTIN_VPHSUBBW,
22854 IX86_BUILTIN_VPHSUBWD,
22855 IX86_BUILTIN_VPHSUBDQ,
22857 IX86_BUILTIN_VPROTB,
22858 IX86_BUILTIN_VPROTW,
22859 IX86_BUILTIN_VPROTD,
22860 IX86_BUILTIN_VPROTQ,
22861 IX86_BUILTIN_VPROTB_IMM,
22862 IX86_BUILTIN_VPROTW_IMM,
22863 IX86_BUILTIN_VPROTD_IMM,
22864 IX86_BUILTIN_VPROTQ_IMM,
22866 IX86_BUILTIN_VPSHLB,
22867 IX86_BUILTIN_VPSHLW,
22868 IX86_BUILTIN_VPSHLD,
22869 IX86_BUILTIN_VPSHLQ,
22870 IX86_BUILTIN_VPSHAB,
22871 IX86_BUILTIN_VPSHAW,
22872 IX86_BUILTIN_VPSHAD,
22873 IX86_BUILTIN_VPSHAQ,
22875 IX86_BUILTIN_VFRCZSS,
22876 IX86_BUILTIN_VFRCZSD,
22877 IX86_BUILTIN_VFRCZPS,
22878 IX86_BUILTIN_VFRCZPD,
22879 IX86_BUILTIN_VFRCZPS256,
22880 IX86_BUILTIN_VFRCZPD256,
22882 IX86_BUILTIN_VPCOMEQUB,
22883 IX86_BUILTIN_VPCOMNEUB,
22884 IX86_BUILTIN_VPCOMLTUB,
22885 IX86_BUILTIN_VPCOMLEUB,
22886 IX86_BUILTIN_VPCOMGTUB,
22887 IX86_BUILTIN_VPCOMGEUB,
22888 IX86_BUILTIN_VPCOMFALSEUB,
22889 IX86_BUILTIN_VPCOMTRUEUB,
22891 IX86_BUILTIN_VPCOMEQUW,
22892 IX86_BUILTIN_VPCOMNEUW,
22893 IX86_BUILTIN_VPCOMLTUW,
22894 IX86_BUILTIN_VPCOMLEUW,
22895 IX86_BUILTIN_VPCOMGTUW,
22896 IX86_BUILTIN_VPCOMGEUW,
22897 IX86_BUILTIN_VPCOMFALSEUW,
22898 IX86_BUILTIN_VPCOMTRUEUW,
22900 IX86_BUILTIN_VPCOMEQUD,
22901 IX86_BUILTIN_VPCOMNEUD,
22902 IX86_BUILTIN_VPCOMLTUD,
22903 IX86_BUILTIN_VPCOMLEUD,
22904 IX86_BUILTIN_VPCOMGTUD,
22905 IX86_BUILTIN_VPCOMGEUD,
22906 IX86_BUILTIN_VPCOMFALSEUD,
22907 IX86_BUILTIN_VPCOMTRUEUD,
22909 IX86_BUILTIN_VPCOMEQUQ,
22910 IX86_BUILTIN_VPCOMNEUQ,
22911 IX86_BUILTIN_VPCOMLTUQ,
22912 IX86_BUILTIN_VPCOMLEUQ,
22913 IX86_BUILTIN_VPCOMGTUQ,
22914 IX86_BUILTIN_VPCOMGEUQ,
22915 IX86_BUILTIN_VPCOMFALSEUQ,
22916 IX86_BUILTIN_VPCOMTRUEUQ,
22918 IX86_BUILTIN_VPCOMEQB,
22919 IX86_BUILTIN_VPCOMNEB,
22920 IX86_BUILTIN_VPCOMLTB,
22921 IX86_BUILTIN_VPCOMLEB,
22922 IX86_BUILTIN_VPCOMGTB,
22923 IX86_BUILTIN_VPCOMGEB,
22924 IX86_BUILTIN_VPCOMFALSEB,
22925 IX86_BUILTIN_VPCOMTRUEB,
22927 IX86_BUILTIN_VPCOMEQW,
22928 IX86_BUILTIN_VPCOMNEW,
22929 IX86_BUILTIN_VPCOMLTW,
22930 IX86_BUILTIN_VPCOMLEW,
22931 IX86_BUILTIN_VPCOMGTW,
22932 IX86_BUILTIN_VPCOMGEW,
22933 IX86_BUILTIN_VPCOMFALSEW,
22934 IX86_BUILTIN_VPCOMTRUEW,
22936 IX86_BUILTIN_VPCOMEQD,
22937 IX86_BUILTIN_VPCOMNED,
22938 IX86_BUILTIN_VPCOMLTD,
22939 IX86_BUILTIN_VPCOMLED,
22940 IX86_BUILTIN_VPCOMGTD,
22941 IX86_BUILTIN_VPCOMGED,
22942 IX86_BUILTIN_VPCOMFALSED,
22943 IX86_BUILTIN_VPCOMTRUED,
22945 IX86_BUILTIN_VPCOMEQQ,
22946 IX86_BUILTIN_VPCOMNEQ,
22947 IX86_BUILTIN_VPCOMLTQ,
22948 IX86_BUILTIN_VPCOMLEQ,
22949 IX86_BUILTIN_VPCOMGTQ,
22950 IX86_BUILTIN_VPCOMGEQ,
22951 IX86_BUILTIN_VPCOMFALSEQ,
22952 IX86_BUILTIN_VPCOMTRUEQ,
22954 /* LWP instructions. */
22955 IX86_BUILTIN_LLWPCB,
22956 IX86_BUILTIN_SLWPCB,
22957 IX86_BUILTIN_LWPVAL32,
22958 IX86_BUILTIN_LWPVAL64,
22959 IX86_BUILTIN_LWPINS32,
22960 IX86_BUILTIN_LWPINS64,
22964 /* FSGSBASE instructions. */
22965 IX86_BUILTIN_RDFSBASE32,
22966 IX86_BUILTIN_RDFSBASE64,
22967 IX86_BUILTIN_RDGSBASE32,
22968 IX86_BUILTIN_RDGSBASE64,
22969 IX86_BUILTIN_WRFSBASE32,
22970 IX86_BUILTIN_WRFSBASE64,
22971 IX86_BUILTIN_WRGSBASE32,
22972 IX86_BUILTIN_WRGSBASE64,
22974 /* RDRND instructions. */
22975 IX86_BUILTIN_RDRAND16,
22976 IX86_BUILTIN_RDRAND32,
22977 IX86_BUILTIN_RDRAND64,
22979 /* F16C instructions. */
22980 IX86_BUILTIN_CVTPH2PS,
22981 IX86_BUILTIN_CVTPH2PS256,
22982 IX86_BUILTIN_CVTPS2PH,
22983 IX86_BUILTIN_CVTPS2PH256,
22988 /* Table for the ix86 builtin decls. */
22989 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
22991 /* Table of all of the builtin functions that are possible with different ISA's
22992 but are waiting to be built until a function is declared to use that
22994 struct builtin_isa {
22995 const char *name; /* function name */
22996 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
22997 int isa; /* isa_flags this builtin is defined for */
22998 bool const_p; /* true if the declaration is constant */
22999 bool set_and_not_built_p;
23002 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
23005 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
23006 of which isa_flags to use in the ix86_builtins_isa array. Stores the
23007 function decl in the ix86_builtins array. Returns the function decl or
23008 NULL_TREE, if the builtin was not added.
23010 If the front end has a special hook for builtin functions, delay adding
23011 builtin functions that aren't in the current ISA until the ISA is changed
23012 with function specific optimization. Doing so, can save about 300K for the
23013 default compiler. When the builtin is expanded, check at that time whether
23016 If the front end doesn't have a special hook, record all builtins, even if
23017 it isn't an instruction set in the current ISA in case the user uses
23018 function specific options for a different ISA, so that we don't get scope
23019 errors if a builtin is added in the middle of a function scope. */
23022 def_builtin (int mask, const char *name, enum ix86_builtin_func_type tcode,
23023 enum ix86_builtins code)
23025 tree decl = NULL_TREE;
23027 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
23029 ix86_builtins_isa[(int) code].isa = mask;
23031 mask &= ~OPTION_MASK_ISA_64BIT;
23033 || (mask & ix86_isa_flags) != 0
23034 || (lang_hooks.builtin_function
23035 == lang_hooks.builtin_function_ext_scope))
23038 tree type = ix86_get_builtin_func_type (tcode);
23039 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
23041 ix86_builtins[(int) code] = decl;
23042 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
23046 ix86_builtins[(int) code] = NULL_TREE;
23047 ix86_builtins_isa[(int) code].tcode = tcode;
23048 ix86_builtins_isa[(int) code].name = name;
23049 ix86_builtins_isa[(int) code].const_p = false;
23050 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
23057 /* Like def_builtin, but also marks the function decl "const". */
23060 def_builtin_const (int mask, const char *name,
23061 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
23063 tree decl = def_builtin (mask, name, tcode, code);
23065 TREE_READONLY (decl) = 1;
23067 ix86_builtins_isa[(int) code].const_p = true;
23072 /* Add any new builtin functions for a given ISA that may not have been
23073 declared. This saves a bit of space compared to adding all of the
23074 declarations to the tree, even if we didn't use them. */
23077 ix86_add_new_builtins (int isa)
23081 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
23083 if ((ix86_builtins_isa[i].isa & isa) != 0
23084 && ix86_builtins_isa[i].set_and_not_built_p)
23088 /* Don't define the builtin again. */
23089 ix86_builtins_isa[i].set_and_not_built_p = false;
23091 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
23092 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
23093 type, i, BUILT_IN_MD, NULL,
23096 ix86_builtins[i] = decl;
23097 if (ix86_builtins_isa[i].const_p)
23098 TREE_READONLY (decl) = 1;
23103 /* Bits for builtin_description.flag. */
23105 /* Set when we don't support the comparison natively, and should
23106 swap_comparison in order to support it. */
23107 #define BUILTIN_DESC_SWAP_OPERANDS 1
23109 struct builtin_description
23111 const unsigned int mask;
23112 const enum insn_code icode;
23113 const char *const name;
23114 const enum ix86_builtins code;
23115 const enum rtx_code comparison;
23119 static const struct builtin_description bdesc_comi[] =
23121 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
23122 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
23123 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
23124 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
23125 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
23126 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
23127 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
23128 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
23129 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
23130 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
23131 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
23132 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
23133 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
23134 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
23135 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
23136 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
23137 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
23138 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
23139 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
23140 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
23141 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
23142 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
23143 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
23144 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
23147 static const struct builtin_description bdesc_pcmpestr[] =
23150 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
23151 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
23152 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
23153 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
23154 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
23155 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
23156 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
23159 static const struct builtin_description bdesc_pcmpistr[] =
23162 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
23163 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
23164 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
23165 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
23166 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
23167 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
23168 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
23171 /* Special builtins with variable number of arguments. */
23172 static const struct builtin_description bdesc_special_args[] =
23174 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
23175 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
23178 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
23181 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
23184 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
23185 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
23186 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
23188 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
23189 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
23190 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
23191 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
23193 /* SSE or 3DNow!A */
23194 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
23195 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
23198 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
23199 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
23200 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
23201 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
23202 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
23203 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
23204 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
23205 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
23206 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
23208 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
23209 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
23212 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
23215 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
23218 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
23219 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
23222 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
23223 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
23225 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
23226 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
23227 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
23228 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
23229 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
23231 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
23232 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
23233 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
23234 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
23235 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
23236 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
23237 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
23239 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
23240 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
23241 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
23243 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
23244 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
23245 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
23246 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
23247 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
23248 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
23249 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
23250 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
23252 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
23253 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
23254 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
23255 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
23256 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
23257 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
23260 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
23261 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
23262 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
23263 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
23264 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
23265 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
23266 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
23267 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
23270 { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandhi, "__builtin_ia32_rdrand16", IX86_BUILTIN_RDRAND16, UNKNOWN, (int) UINT16_FTYPE_VOID },
23271 { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandsi, "__builtin_ia32_rdrand32", IX86_BUILTIN_RDRAND32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
23272 { OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT, CODE_FOR_rdranddi, "__builtin_ia32_rdrand64", IX86_BUILTIN_RDRAND64, UNKNOWN, (int) UINT64_FTYPE_VOID },
23275 /* Builtins with variable number of arguments. */
23276 static const struct builtin_description bdesc_args[] =
23278 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
23279 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
23280 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
23281 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
23282 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
23283 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
23284 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
23287 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23288 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23289 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23290 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23291 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23292 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23294 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23295 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23296 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23297 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23298 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23299 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23300 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23301 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23303 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23304 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23306 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23307 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23308 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23309 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23311 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23312 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23313 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23314 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23315 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23316 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23318 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23319 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23320 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23321 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23322 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
23323 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
23325 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
23326 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
23327 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
23329 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
23331 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
23332 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
23333 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
23334 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
23335 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
23336 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
23338 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
23339 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
23340 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
23341 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
23342 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
23343 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
23345 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
23346 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
23347 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
23348 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
23351 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
23352 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
23353 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
23354 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
23356 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23357 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23358 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23359 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
23360 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
23361 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
23362 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23363 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23364 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23365 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23366 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23367 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23368 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23369 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23370 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23373 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
23374 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
23375 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
23376 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
23377 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23378 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
23381 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
23382 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23383 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23384 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23385 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23386 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23387 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
23388 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
23389 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
23390 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
23391 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
23392 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
23394 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23396 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23397 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23398 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23399 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23400 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23401 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23402 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23403 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23405 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
23406 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
23407 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
23408 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
23409 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
23410 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
23411 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
23412 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
23413 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
23414 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
23415 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
23416 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
23417 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
23418 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
23419 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
23420 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
23421 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
23422 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
23423 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
23424 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
23425 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
23426 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
23428 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23429 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23430 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23431 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23433 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23434 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23435 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23436 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23438 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23440 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23441 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23442 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23443 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23444 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23446 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
23447 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
23448 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
23450 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
23452 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
23453 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
23454 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
23456 /* SSE MMX or 3Dnow!A */
23457 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23458 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23459 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23461 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23462 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23463 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23464 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23466 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
23467 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
23469 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
23472 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23474 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
23475 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
23476 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
23477 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
23478 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
23479 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
23480 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI },
23481 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
23482 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
23483 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
23484 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
23485 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
23487 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
23488 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
23489 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
23490 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
23491 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
23492 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
23494 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
23495 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
23496 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
23497 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
23498 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
23500 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
23502 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
23503 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
23504 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
23505 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
23507 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
23508 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
23509 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
23511 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23512 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23513 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23514 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23515 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23516 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23517 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23518 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23520 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
23521 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
23522 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
23523 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
23524 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
23525 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
23526 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
23527 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
23528 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
23529 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
23530 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
23531 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
23532 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
23533 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
23534 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
23535 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
23536 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
23537 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
23538 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
23539 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
23541 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23542 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23543 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23544 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23546 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23547 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23548 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23549 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23551 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23553 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23554 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23555 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23557 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
23559 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23560 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23561 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23562 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23563 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23564 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23565 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23566 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23568 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23569 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23570 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23571 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23572 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23573 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23574 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23575 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23577 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23578 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
23580 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23581 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23582 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23583 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23585 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23586 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23588 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23589 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23590 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23591 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23592 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23593 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23595 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23596 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23597 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23598 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23600 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23601 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23602 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23603 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23604 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23605 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23606 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23607 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23609 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
23610 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
23611 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
23613 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23614 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
23616 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
23617 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
23619 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
23621 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
23622 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
23623 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
23624 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
23626 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
23627 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
23628 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
23629 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
23630 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
23631 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
23632 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
23634 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
23635 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
23636 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
23637 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
23638 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
23639 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
23640 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
23642 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
23643 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
23644 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
23645 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
23647 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
23648 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
23649 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
23651 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
23653 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
23654 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
23656 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
23659 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
23660 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
23663 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
23664 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23666 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23667 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23668 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23669 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23670 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23671 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23674 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
23675 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
23676 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
23677 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
23678 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
23679 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
23681 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23682 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23683 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23684 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23685 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23686 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23687 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23688 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23689 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23690 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23691 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23692 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23693 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
23694 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
23695 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23696 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23697 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23698 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23699 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23700 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23701 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23702 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23703 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23704 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23707 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
23708 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
23711 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23712 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23713 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
23714 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
23715 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23716 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23717 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23718 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
23719 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
23720 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
23722 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
23723 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
23724 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
23725 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
23726 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
23727 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
23728 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
23729 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
23730 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
23731 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
23732 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
23733 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
23734 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
23736 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
23737 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23738 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23739 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23740 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23741 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23742 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23743 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23744 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23745 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23746 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
23747 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23750 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
23751 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
23752 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23753 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23755 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
23756 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
23757 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
23760 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23761 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
23762 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
23763 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
23764 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
23767 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
23768 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
23769 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
23770 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23773 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
23774 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
23776 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23777 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23778 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23779 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23782 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
23785 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23786 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23787 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23788 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23789 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23790 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23791 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23792 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23793 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23794 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23795 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23796 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23797 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23798 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23799 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23800 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23801 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23802 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23803 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23804 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23805 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23806 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23807 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23808 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23809 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23810 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23812 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
23813 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
23814 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
23815 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
23817 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23818 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23819 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
23820 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
23821 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23822 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23823 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23824 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23825 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23826 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23827 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23828 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23829 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23830 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
23831 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
23832 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
23833 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
23834 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
23835 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
23836 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
23837 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
23838 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
23839 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
23840 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
23841 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23842 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23843 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
23844 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
23845 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
23846 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
23847 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
23848 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
23849 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
23850 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
23852 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23853 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23854 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
23856 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
23857 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23858 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23859 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23860 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23862 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23864 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
23865 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
23867 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23868 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23869 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23870 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23872 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
23873 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
23874 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
23875 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
23876 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
23877 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
23879 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23880 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23881 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23882 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23883 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23884 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23885 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23886 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23887 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23888 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23889 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23890 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23891 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23892 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23893 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23895 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
23896 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
23898 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23899 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23901 { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
23904 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
23905 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
23906 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
23907 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
23910 /* FMA4 and XOP. */
23911 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
23912 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
23913 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
23914 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
23915 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
23916 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
23917 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
23918 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
23919 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
23920 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
23921 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
23922 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
23923 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
23924 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
23925 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
23926 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
23927 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
23928 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
23929 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
23930 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
23931 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
23932 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
23933 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
23934 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
23935 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
23936 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
23937 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
23938 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
23939 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
23940 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
23941 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
23942 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
23943 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
23944 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
23945 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
23946 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
23947 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
23948 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
23949 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
23950 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
23951 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
23952 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
23953 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
23954 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
23955 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
23956 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
23957 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
23958 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
23959 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
23960 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
23961 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
23962 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
23964 static const struct builtin_description bdesc_multi_arg[] =
23966 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmaddv4sf4, "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23967 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmaddv2df4, "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23968 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4sf4, "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23969 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv2df4, "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23970 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv4sf4, "__builtin_ia32_vfmsubss", IX86_BUILTIN_VFMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23971 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv2df4, "__builtin_ia32_vfmsubsd", IX86_BUILTIN_VFMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23972 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4sf4, "__builtin_ia32_vfmsubps", IX86_BUILTIN_VFMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23973 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv2df4, "__builtin_ia32_vfmsubpd", IX86_BUILTIN_VFMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23975 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv4sf4, "__builtin_ia32_vfnmaddss", IX86_BUILTIN_VFNMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23976 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv2df4, "__builtin_ia32_vfnmaddsd", IX86_BUILTIN_VFNMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23977 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4sf4, "__builtin_ia32_vfnmaddps", IX86_BUILTIN_VFNMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23978 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv2df4, "__builtin_ia32_vfnmaddpd", IX86_BUILTIN_VFNMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23979 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmsubv4sf4, "__builtin_ia32_vfnmsubss", IX86_BUILTIN_VFNMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23980 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmsubv2df4, "__builtin_ia32_vfnmsubsd", IX86_BUILTIN_VFNMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23981 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv4sf4, "__builtin_ia32_vfnmsubps", IX86_BUILTIN_VFNMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23982 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv2df4, "__builtin_ia32_vfnmsubpd", IX86_BUILTIN_VFNMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23984 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4sf4, "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23985 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv2df4, "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23986 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4sf4, "__builtin_ia32_vfmsubaddps", IX86_BUILTIN_VFMSUBADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23987 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv2df4, "__builtin_ia32_vfmsubaddpd", IX86_BUILTIN_VFMSUBADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23989 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv8sf4256, "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23990 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4df4256, "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23991 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv8sf4256, "__builtin_ia32_vfmsubps256", IX86_BUILTIN_VFMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23992 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4df4256, "__builtin_ia32_vfmsubpd256", IX86_BUILTIN_VFMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23994 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv8sf4256, "__builtin_ia32_vfnmaddps256", IX86_BUILTIN_VFNMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23995 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4df4256, "__builtin_ia32_vfnmaddpd256", IX86_BUILTIN_VFNMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23996 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv8sf4256, "__builtin_ia32_vfnmsubps256", IX86_BUILTIN_VFNMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23997 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv4df4256, "__builtin_ia32_vfnmsubpd256", IX86_BUILTIN_VFNMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23999 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv8sf4, "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
24000 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4df4, "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
24001 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv8sf4, "__builtin_ia32_vfmsubaddps256", IX86_BUILTIN_VFMSUBADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
24002 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4df4, "__builtin_ia32_vfmsubaddpd256", IX86_BUILTIN_VFMSUBADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
24004 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
24005 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
24006 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
24007 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
24008 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
24009 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
24010 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
24012 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
24013 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
24014 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
24015 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
24016 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
24017 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
24018 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
24020 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
24022 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
24023 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
24024 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
24025 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
24026 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
24027 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
24028 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
24029 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
24030 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
24031 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
24032 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
24033 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
24035 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
24036 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
24037 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
24038 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
24039 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
24040 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
24041 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
24042 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
24043 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
24044 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
24045 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
24046 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
24047 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
24048 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
24049 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
24050 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
24052 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
24053 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
24054 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
24055 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
24056 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2256, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
24057 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2256, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
24059 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
24060 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
24061 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
24062 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
24063 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
24064 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
24065 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
24066 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
24067 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
24068 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
24069 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
24070 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
24071 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
24072 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
24073 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
24075 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
24076 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
24077 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
24078 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
24079 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
24080 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
24081 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
24083 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
24084 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
24085 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
24086 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
24087 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
24088 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
24089 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
24091 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
24092 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
24093 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
24094 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
24095 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
24096 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
24097 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
24099 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
24100 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
24101 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
24102 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
24103 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
24104 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
24105 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
24107 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
24108 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
24109 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
24110 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
24111 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
24112 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
24113 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
24115 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
24116 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
24117 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
24118 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
24119 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
24120 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
24121 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
24123 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
24124 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
24125 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
24126 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
24127 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
24128 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
24129 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
24131 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
24132 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
24133 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
24134 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
24135 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
24136 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
24137 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
24139 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
24140 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
24141 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
24142 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
24143 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
24144 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
24145 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
24146 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
24148 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
24149 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
24150 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
24151 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
24152 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
24153 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
24154 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
24155 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
24157 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
24158 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
24159 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
24160 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
24164 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
24165 in the current target ISA to allow the user to compile particular modules
24166 with different target specific options that differ from the command line
24169 ix86_init_mmx_sse_builtins (void)
24171 const struct builtin_description * d;
24172 enum ix86_builtin_func_type ftype;
24175 /* Add all special builtins with variable number of operands. */
24176 for (i = 0, d = bdesc_special_args;
24177 i < ARRAY_SIZE (bdesc_special_args);
24183 ftype = (enum ix86_builtin_func_type) d->flag;
24184 def_builtin (d->mask, d->name, ftype, d->code);
24187 /* Add all builtins with variable number of operands. */
24188 for (i = 0, d = bdesc_args;
24189 i < ARRAY_SIZE (bdesc_args);
24195 ftype = (enum ix86_builtin_func_type) d->flag;
24196 def_builtin_const (d->mask, d->name, ftype, d->code);
24199 /* pcmpestr[im] insns. */
24200 for (i = 0, d = bdesc_pcmpestr;
24201 i < ARRAY_SIZE (bdesc_pcmpestr);
24204 if (d->code == IX86_BUILTIN_PCMPESTRM128)
24205 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
24207 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
24208 def_builtin_const (d->mask, d->name, ftype, d->code);
24211 /* pcmpistr[im] insns. */
24212 for (i = 0, d = bdesc_pcmpistr;
24213 i < ARRAY_SIZE (bdesc_pcmpistr);
24216 if (d->code == IX86_BUILTIN_PCMPISTRM128)
24217 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
24219 ftype = INT_FTYPE_V16QI_V16QI_INT;
24220 def_builtin_const (d->mask, d->name, ftype, d->code);
24223 /* comi/ucomi insns. */
24224 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
24226 if (d->mask == OPTION_MASK_ISA_SSE2)
24227 ftype = INT_FTYPE_V2DF_V2DF;
24229 ftype = INT_FTYPE_V4SF_V4SF;
24230 def_builtin_const (d->mask, d->name, ftype, d->code);
24234 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
24235 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
24236 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
24237 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
24239 /* SSE or 3DNow!A */
24240 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
24241 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
24242 IX86_BUILTIN_MASKMOVQ);
24245 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
24246 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
24248 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
24249 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
24250 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
24251 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
24254 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
24255 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
24256 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
24257 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
24260 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
24261 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
24262 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
24263 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
24264 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
24265 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
24266 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
24267 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
24268 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
24269 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
24270 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
24271 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
24274 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
24275 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
24277 /* MMX access to the vec_init patterns. */
24278 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
24279 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
24281 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
24282 V4HI_FTYPE_HI_HI_HI_HI,
24283 IX86_BUILTIN_VEC_INIT_V4HI);
24285 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
24286 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
24287 IX86_BUILTIN_VEC_INIT_V8QI);
24289 /* Access to the vec_extract patterns. */
24290 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
24291 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
24292 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
24293 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
24294 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
24295 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
24296 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
24297 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
24298 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
24299 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
24301 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
24302 "__builtin_ia32_vec_ext_v4hi",
24303 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
24305 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
24306 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
24308 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
24309 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
24311 /* Access to the vec_set patterns. */
24312 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
24313 "__builtin_ia32_vec_set_v2di",
24314 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
24316 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
24317 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
24319 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
24320 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
24322 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
24323 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
24325 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
24326 "__builtin_ia32_vec_set_v4hi",
24327 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
24329 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
24330 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
24332 /* Add FMA4 multi-arg argument instructions */
24333 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
24338 ftype = (enum ix86_builtin_func_type) d->flag;
24339 def_builtin_const (d->mask, d->name, ftype, d->code);
24343 /* Internal method for ix86_init_builtins. */
24346 ix86_init_builtins_va_builtins_abi (void)
24348 tree ms_va_ref, sysv_va_ref;
24349 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
24350 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
24351 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
24352 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
24356 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
24357 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
24358 ms_va_ref = build_reference_type (ms_va_list_type_node);
24360 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
24363 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
24364 fnvoid_va_start_ms =
24365 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
24366 fnvoid_va_end_sysv =
24367 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
24368 fnvoid_va_start_sysv =
24369 build_varargs_function_type_list (void_type_node, sysv_va_ref,
24371 fnvoid_va_copy_ms =
24372 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
24374 fnvoid_va_copy_sysv =
24375 build_function_type_list (void_type_node, sysv_va_ref,
24376 sysv_va_ref, NULL_TREE);
24378 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
24379 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
24380 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
24381 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
24382 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
24383 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
24384 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
24385 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
24386 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
24387 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
24388 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
24389 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
24393 ix86_init_builtin_types (void)
24395 tree float128_type_node, float80_type_node;
24397 /* The __float80 type. */
24398 float80_type_node = long_double_type_node;
24399 if (TYPE_MODE (float80_type_node) != XFmode)
24401 /* The __float80 type. */
24402 float80_type_node = make_node (REAL_TYPE);
24404 TYPE_PRECISION (float80_type_node) = 80;
24405 layout_type (float80_type_node);
24407 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
24409 /* The __float128 type. */
24410 float128_type_node = make_node (REAL_TYPE);
24411 TYPE_PRECISION (float128_type_node) = 128;
24412 layout_type (float128_type_node);
24413 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
24415 /* This macro is built by i386-builtin-types.awk. */
24416 DEFINE_BUILTIN_PRIMITIVE_TYPES;
24420 ix86_init_builtins (void)
24424 ix86_init_builtin_types ();
24426 /* TFmode support builtins. */
24427 def_builtin_const (0, "__builtin_infq",
24428 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
24429 def_builtin_const (0, "__builtin_huge_valq",
24430 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
24432 /* We will expand them to normal call if SSE2 isn't available since
24433 they are used by libgcc. */
24434 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
24435 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
24436 BUILT_IN_MD, "__fabstf2", NULL_TREE);
24437 TREE_READONLY (t) = 1;
24438 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
24440 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
24441 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
24442 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
24443 TREE_READONLY (t) = 1;
24444 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
24446 ix86_init_mmx_sse_builtins ();
24449 ix86_init_builtins_va_builtins_abi ();
24452 /* Return the ix86 builtin for CODE. */
24455 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
24457 if (code >= IX86_BUILTIN_MAX)
24458 return error_mark_node;
24460 return ix86_builtins[code];
24463 /* Errors in the source file can cause expand_expr to return const0_rtx
24464 where we expect a vector. To avoid crashing, use one of the vector
24465 clear instructions. */
24467 safe_vector_operand (rtx x, enum machine_mode mode)
24469 if (x == const0_rtx)
24470 x = CONST0_RTX (mode);
24474 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
24477 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
24480 tree arg0 = CALL_EXPR_ARG (exp, 0);
24481 tree arg1 = CALL_EXPR_ARG (exp, 1);
24482 rtx op0 = expand_normal (arg0);
24483 rtx op1 = expand_normal (arg1);
24484 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24485 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24486 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
24488 if (VECTOR_MODE_P (mode0))
24489 op0 = safe_vector_operand (op0, mode0);
24490 if (VECTOR_MODE_P (mode1))
24491 op1 = safe_vector_operand (op1, mode1);
24493 if (optimize || !target
24494 || GET_MODE (target) != tmode
24495 || !insn_data[icode].operand[0].predicate (target, tmode))
24496 target = gen_reg_rtx (tmode);
24498 if (GET_MODE (op1) == SImode && mode1 == TImode)
24500 rtx x = gen_reg_rtx (V4SImode);
24501 emit_insn (gen_sse2_loadd (x, op1));
24502 op1 = gen_lowpart (TImode, x);
24505 if (!insn_data[icode].operand[1].predicate (op0, mode0))
24506 op0 = copy_to_mode_reg (mode0, op0);
24507 if (!insn_data[icode].operand[2].predicate (op1, mode1))
24508 op1 = copy_to_mode_reg (mode1, op1);
24510 pat = GEN_FCN (icode) (target, op0, op1);
24519 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
24522 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
24523 enum ix86_builtin_func_type m_type,
24524 enum rtx_code sub_code)
24529 bool comparison_p = false;
24531 bool last_arg_constant = false;
24532 int num_memory = 0;
24535 enum machine_mode mode;
24538 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24542 case MULTI_ARG_4_DF2_DI_I:
24543 case MULTI_ARG_4_DF2_DI_I1:
24544 case MULTI_ARG_4_SF2_SI_I:
24545 case MULTI_ARG_4_SF2_SI_I1:
24547 last_arg_constant = true;
24550 case MULTI_ARG_3_SF:
24551 case MULTI_ARG_3_DF:
24552 case MULTI_ARG_3_SF2:
24553 case MULTI_ARG_3_DF2:
24554 case MULTI_ARG_3_DI:
24555 case MULTI_ARG_3_SI:
24556 case MULTI_ARG_3_SI_DI:
24557 case MULTI_ARG_3_HI:
24558 case MULTI_ARG_3_HI_SI:
24559 case MULTI_ARG_3_QI:
24560 case MULTI_ARG_3_DI2:
24561 case MULTI_ARG_3_SI2:
24562 case MULTI_ARG_3_HI2:
24563 case MULTI_ARG_3_QI2:
24567 case MULTI_ARG_2_SF:
24568 case MULTI_ARG_2_DF:
24569 case MULTI_ARG_2_DI:
24570 case MULTI_ARG_2_SI:
24571 case MULTI_ARG_2_HI:
24572 case MULTI_ARG_2_QI:
24576 case MULTI_ARG_2_DI_IMM:
24577 case MULTI_ARG_2_SI_IMM:
24578 case MULTI_ARG_2_HI_IMM:
24579 case MULTI_ARG_2_QI_IMM:
24581 last_arg_constant = true;
24584 case MULTI_ARG_1_SF:
24585 case MULTI_ARG_1_DF:
24586 case MULTI_ARG_1_SF2:
24587 case MULTI_ARG_1_DF2:
24588 case MULTI_ARG_1_DI:
24589 case MULTI_ARG_1_SI:
24590 case MULTI_ARG_1_HI:
24591 case MULTI_ARG_1_QI:
24592 case MULTI_ARG_1_SI_DI:
24593 case MULTI_ARG_1_HI_DI:
24594 case MULTI_ARG_1_HI_SI:
24595 case MULTI_ARG_1_QI_DI:
24596 case MULTI_ARG_1_QI_SI:
24597 case MULTI_ARG_1_QI_HI:
24601 case MULTI_ARG_2_DI_CMP:
24602 case MULTI_ARG_2_SI_CMP:
24603 case MULTI_ARG_2_HI_CMP:
24604 case MULTI_ARG_2_QI_CMP:
24606 comparison_p = true;
24609 case MULTI_ARG_2_SF_TF:
24610 case MULTI_ARG_2_DF_TF:
24611 case MULTI_ARG_2_DI_TF:
24612 case MULTI_ARG_2_SI_TF:
24613 case MULTI_ARG_2_HI_TF:
24614 case MULTI_ARG_2_QI_TF:
24620 gcc_unreachable ();
24623 if (optimize || !target
24624 || GET_MODE (target) != tmode
24625 || !insn_data[icode].operand[0].predicate (target, tmode))
24626 target = gen_reg_rtx (tmode);
24628 gcc_assert (nargs <= 4);
24630 for (i = 0; i < nargs; i++)
24632 tree arg = CALL_EXPR_ARG (exp, i);
24633 rtx op = expand_normal (arg);
24634 int adjust = (comparison_p) ? 1 : 0;
24635 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
24637 if (last_arg_constant && i == nargs-1)
24639 if (!CONST_INT_P (op))
24641 error ("last argument must be an immediate");
24642 return gen_reg_rtx (tmode);
24647 if (VECTOR_MODE_P (mode))
24648 op = safe_vector_operand (op, mode);
24650 /* If we aren't optimizing, only allow one memory operand to be
24652 if (memory_operand (op, mode))
24655 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
24658 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
24660 op = force_reg (mode, op);
24664 args[i].mode = mode;
24670 pat = GEN_FCN (icode) (target, args[0].op);
24675 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
24676 GEN_INT ((int)sub_code));
24677 else if (! comparison_p)
24678 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
24681 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
24685 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
24690 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
24694 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
24698 gcc_unreachable ();
24708 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
24709 insns with vec_merge. */
24712 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
24716 tree arg0 = CALL_EXPR_ARG (exp, 0);
24717 rtx op1, op0 = expand_normal (arg0);
24718 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24719 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24721 if (optimize || !target
24722 || GET_MODE (target) != tmode
24723 || !insn_data[icode].operand[0].predicate (target, tmode))
24724 target = gen_reg_rtx (tmode);
24726 if (VECTOR_MODE_P (mode0))
24727 op0 = safe_vector_operand (op0, mode0);
24729 if ((optimize && !register_operand (op0, mode0))
24730 || !insn_data[icode].operand[1].predicate (op0, mode0))
24731 op0 = copy_to_mode_reg (mode0, op0);
24734 if (!insn_data[icode].operand[2].predicate (op1, mode0))
24735 op1 = copy_to_mode_reg (mode0, op1);
24737 pat = GEN_FCN (icode) (target, op0, op1);
24744 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
24747 ix86_expand_sse_compare (const struct builtin_description *d,
24748 tree exp, rtx target, bool swap)
24751 tree arg0 = CALL_EXPR_ARG (exp, 0);
24752 tree arg1 = CALL_EXPR_ARG (exp, 1);
24753 rtx op0 = expand_normal (arg0);
24754 rtx op1 = expand_normal (arg1);
24756 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
24757 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
24758 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
24759 enum rtx_code comparison = d->comparison;
24761 if (VECTOR_MODE_P (mode0))
24762 op0 = safe_vector_operand (op0, mode0);
24763 if (VECTOR_MODE_P (mode1))
24764 op1 = safe_vector_operand (op1, mode1);
24766 /* Swap operands if we have a comparison that isn't available in
24770 rtx tmp = gen_reg_rtx (mode1);
24771 emit_move_insn (tmp, op1);
24776 if (optimize || !target
24777 || GET_MODE (target) != tmode
24778 || !insn_data[d->icode].operand[0].predicate (target, tmode))
24779 target = gen_reg_rtx (tmode);
24781 if ((optimize && !register_operand (op0, mode0))
24782 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
24783 op0 = copy_to_mode_reg (mode0, op0);
24784 if ((optimize && !register_operand (op1, mode1))
24785 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
24786 op1 = copy_to_mode_reg (mode1, op1);
24788 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
24789 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
24796 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
24799 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
24803 tree arg0 = CALL_EXPR_ARG (exp, 0);
24804 tree arg1 = CALL_EXPR_ARG (exp, 1);
24805 rtx op0 = expand_normal (arg0);
24806 rtx op1 = expand_normal (arg1);
24807 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24808 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24809 enum rtx_code comparison = d->comparison;
24811 if (VECTOR_MODE_P (mode0))
24812 op0 = safe_vector_operand (op0, mode0);
24813 if (VECTOR_MODE_P (mode1))
24814 op1 = safe_vector_operand (op1, mode1);
24816 /* Swap operands if we have a comparison that isn't available in
24818 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
24825 target = gen_reg_rtx (SImode);
24826 emit_move_insn (target, const0_rtx);
24827 target = gen_rtx_SUBREG (QImode, target, 0);
24829 if ((optimize && !register_operand (op0, mode0))
24830 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
24831 op0 = copy_to_mode_reg (mode0, op0);
24832 if ((optimize && !register_operand (op1, mode1))
24833 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
24834 op1 = copy_to_mode_reg (mode1, op1);
24836 pat = GEN_FCN (d->icode) (op0, op1);
24840 emit_insn (gen_rtx_SET (VOIDmode,
24841 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24842 gen_rtx_fmt_ee (comparison, QImode,
24846 return SUBREG_REG (target);
24849 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
24852 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
24856 tree arg0 = CALL_EXPR_ARG (exp, 0);
24857 tree arg1 = CALL_EXPR_ARG (exp, 1);
24858 rtx op0 = expand_normal (arg0);
24859 rtx op1 = expand_normal (arg1);
24860 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24861 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24862 enum rtx_code comparison = d->comparison;
24864 if (VECTOR_MODE_P (mode0))
24865 op0 = safe_vector_operand (op0, mode0);
24866 if (VECTOR_MODE_P (mode1))
24867 op1 = safe_vector_operand (op1, mode1);
24869 target = gen_reg_rtx (SImode);
24870 emit_move_insn (target, const0_rtx);
24871 target = gen_rtx_SUBREG (QImode, target, 0);
24873 if ((optimize && !register_operand (op0, mode0))
24874 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
24875 op0 = copy_to_mode_reg (mode0, op0);
24876 if ((optimize && !register_operand (op1, mode1))
24877 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
24878 op1 = copy_to_mode_reg (mode1, op1);
24880 pat = GEN_FCN (d->icode) (op0, op1);
24884 emit_insn (gen_rtx_SET (VOIDmode,
24885 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24886 gen_rtx_fmt_ee (comparison, QImode,
24890 return SUBREG_REG (target);
24893 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
24896 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
24897 tree exp, rtx target)
24900 tree arg0 = CALL_EXPR_ARG (exp, 0);
24901 tree arg1 = CALL_EXPR_ARG (exp, 1);
24902 tree arg2 = CALL_EXPR_ARG (exp, 2);
24903 tree arg3 = CALL_EXPR_ARG (exp, 3);
24904 tree arg4 = CALL_EXPR_ARG (exp, 4);
24905 rtx scratch0, scratch1;
24906 rtx op0 = expand_normal (arg0);
24907 rtx op1 = expand_normal (arg1);
24908 rtx op2 = expand_normal (arg2);
24909 rtx op3 = expand_normal (arg3);
24910 rtx op4 = expand_normal (arg4);
24911 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
24913 tmode0 = insn_data[d->icode].operand[0].mode;
24914 tmode1 = insn_data[d->icode].operand[1].mode;
24915 modev2 = insn_data[d->icode].operand[2].mode;
24916 modei3 = insn_data[d->icode].operand[3].mode;
24917 modev4 = insn_data[d->icode].operand[4].mode;
24918 modei5 = insn_data[d->icode].operand[5].mode;
24919 modeimm = insn_data[d->icode].operand[6].mode;
24921 if (VECTOR_MODE_P (modev2))
24922 op0 = safe_vector_operand (op0, modev2);
24923 if (VECTOR_MODE_P (modev4))
24924 op2 = safe_vector_operand (op2, modev4);
24926 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
24927 op0 = copy_to_mode_reg (modev2, op0);
24928 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
24929 op1 = copy_to_mode_reg (modei3, op1);
24930 if ((optimize && !register_operand (op2, modev4))
24931 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
24932 op2 = copy_to_mode_reg (modev4, op2);
24933 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
24934 op3 = copy_to_mode_reg (modei5, op3);
24936 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
24938 error ("the fifth argument must be a 8-bit immediate");
24942 if (d->code == IX86_BUILTIN_PCMPESTRI128)
24944 if (optimize || !target
24945 || GET_MODE (target) != tmode0
24946 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
24947 target = gen_reg_rtx (tmode0);
24949 scratch1 = gen_reg_rtx (tmode1);
24951 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
24953 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
24955 if (optimize || !target
24956 || GET_MODE (target) != tmode1
24957 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
24958 target = gen_reg_rtx (tmode1);
24960 scratch0 = gen_reg_rtx (tmode0);
24962 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
24966 gcc_assert (d->flag);
24968 scratch0 = gen_reg_rtx (tmode0);
24969 scratch1 = gen_reg_rtx (tmode1);
24971 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
24981 target = gen_reg_rtx (SImode);
24982 emit_move_insn (target, const0_rtx);
24983 target = gen_rtx_SUBREG (QImode, target, 0);
24986 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24987 gen_rtx_fmt_ee (EQ, QImode,
24988 gen_rtx_REG ((enum machine_mode) d->flag,
24991 return SUBREG_REG (target);
24998 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
25001 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
25002 tree exp, rtx target)
25005 tree arg0 = CALL_EXPR_ARG (exp, 0);
25006 tree arg1 = CALL_EXPR_ARG (exp, 1);
25007 tree arg2 = CALL_EXPR_ARG (exp, 2);
25008 rtx scratch0, scratch1;
25009 rtx op0 = expand_normal (arg0);
25010 rtx op1 = expand_normal (arg1);
25011 rtx op2 = expand_normal (arg2);
25012 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
25014 tmode0 = insn_data[d->icode].operand[0].mode;
25015 tmode1 = insn_data[d->icode].operand[1].mode;
25016 modev2 = insn_data[d->icode].operand[2].mode;
25017 modev3 = insn_data[d->icode].operand[3].mode;
25018 modeimm = insn_data[d->icode].operand[4].mode;
25020 if (VECTOR_MODE_P (modev2))
25021 op0 = safe_vector_operand (op0, modev2);
25022 if (VECTOR_MODE_P (modev3))
25023 op1 = safe_vector_operand (op1, modev3);
25025 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
25026 op0 = copy_to_mode_reg (modev2, op0);
25027 if ((optimize && !register_operand (op1, modev3))
25028 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
25029 op1 = copy_to_mode_reg (modev3, op1);
25031 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
25033 error ("the third argument must be a 8-bit immediate");
25037 if (d->code == IX86_BUILTIN_PCMPISTRI128)
25039 if (optimize || !target
25040 || GET_MODE (target) != tmode0
25041 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
25042 target = gen_reg_rtx (tmode0);
25044 scratch1 = gen_reg_rtx (tmode1);
25046 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
25048 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
25050 if (optimize || !target
25051 || GET_MODE (target) != tmode1
25052 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
25053 target = gen_reg_rtx (tmode1);
25055 scratch0 = gen_reg_rtx (tmode0);
25057 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
25061 gcc_assert (d->flag);
25063 scratch0 = gen_reg_rtx (tmode0);
25064 scratch1 = gen_reg_rtx (tmode1);
25066 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
25076 target = gen_reg_rtx (SImode);
25077 emit_move_insn (target, const0_rtx);
25078 target = gen_rtx_SUBREG (QImode, target, 0);
25081 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
25082 gen_rtx_fmt_ee (EQ, QImode,
25083 gen_rtx_REG ((enum machine_mode) d->flag,
25086 return SUBREG_REG (target);
25092 /* Subroutine of ix86_expand_builtin to take care of insns with
25093 variable number of operands. */
25096 ix86_expand_args_builtin (const struct builtin_description *d,
25097 tree exp, rtx target)
25099 rtx pat, real_target;
25100 unsigned int i, nargs;
25101 unsigned int nargs_constant = 0;
25102 int num_memory = 0;
25106 enum machine_mode mode;
25108 bool last_arg_count = false;
25109 enum insn_code icode = d->icode;
25110 const struct insn_data_d *insn_p = &insn_data[icode];
25111 enum machine_mode tmode = insn_p->operand[0].mode;
25112 enum machine_mode rmode = VOIDmode;
25114 enum rtx_code comparison = d->comparison;
25116 switch ((enum ix86_builtin_func_type) d->flag)
25118 case INT_FTYPE_V8SF_V8SF_PTEST:
25119 case INT_FTYPE_V4DI_V4DI_PTEST:
25120 case INT_FTYPE_V4DF_V4DF_PTEST:
25121 case INT_FTYPE_V4SF_V4SF_PTEST:
25122 case INT_FTYPE_V2DI_V2DI_PTEST:
25123 case INT_FTYPE_V2DF_V2DF_PTEST:
25124 return ix86_expand_sse_ptest (d, exp, target);
25125 case FLOAT128_FTYPE_FLOAT128:
25126 case FLOAT_FTYPE_FLOAT:
25127 case INT_FTYPE_INT:
25128 case UINT64_FTYPE_INT:
25129 case UINT16_FTYPE_UINT16:
25130 case INT64_FTYPE_INT64:
25131 case INT64_FTYPE_V4SF:
25132 case INT64_FTYPE_V2DF:
25133 case INT_FTYPE_V16QI:
25134 case INT_FTYPE_V8QI:
25135 case INT_FTYPE_V8SF:
25136 case INT_FTYPE_V4DF:
25137 case INT_FTYPE_V4SF:
25138 case INT_FTYPE_V2DF:
25139 case V16QI_FTYPE_V16QI:
25140 case V8SI_FTYPE_V8SF:
25141 case V8SI_FTYPE_V4SI:
25142 case V8HI_FTYPE_V8HI:
25143 case V8HI_FTYPE_V16QI:
25144 case V8QI_FTYPE_V8QI:
25145 case V8SF_FTYPE_V8SF:
25146 case V8SF_FTYPE_V8SI:
25147 case V8SF_FTYPE_V4SF:
25148 case V8SF_FTYPE_V8HI:
25149 case V4SI_FTYPE_V4SI:
25150 case V4SI_FTYPE_V16QI:
25151 case V4SI_FTYPE_V4SF:
25152 case V4SI_FTYPE_V8SI:
25153 case V4SI_FTYPE_V8HI:
25154 case V4SI_FTYPE_V4DF:
25155 case V4SI_FTYPE_V2DF:
25156 case V4HI_FTYPE_V4HI:
25157 case V4DF_FTYPE_V4DF:
25158 case V4DF_FTYPE_V4SI:
25159 case V4DF_FTYPE_V4SF:
25160 case V4DF_FTYPE_V2DF:
25161 case V4SF_FTYPE_V4SF:
25162 case V4SF_FTYPE_V4SI:
25163 case V4SF_FTYPE_V8SF:
25164 case V4SF_FTYPE_V4DF:
25165 case V4SF_FTYPE_V8HI:
25166 case V4SF_FTYPE_V2DF:
25167 case V2DI_FTYPE_V2DI:
25168 case V2DI_FTYPE_V16QI:
25169 case V2DI_FTYPE_V8HI:
25170 case V2DI_FTYPE_V4SI:
25171 case V2DF_FTYPE_V2DF:
25172 case V2DF_FTYPE_V4SI:
25173 case V2DF_FTYPE_V4DF:
25174 case V2DF_FTYPE_V4SF:
25175 case V2DF_FTYPE_V2SI:
25176 case V2SI_FTYPE_V2SI:
25177 case V2SI_FTYPE_V4SF:
25178 case V2SI_FTYPE_V2SF:
25179 case V2SI_FTYPE_V2DF:
25180 case V2SF_FTYPE_V2SF:
25181 case V2SF_FTYPE_V2SI:
25184 case V4SF_FTYPE_V4SF_VEC_MERGE:
25185 case V2DF_FTYPE_V2DF_VEC_MERGE:
25186 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
25187 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
25188 case V16QI_FTYPE_V16QI_V16QI:
25189 case V16QI_FTYPE_V8HI_V8HI:
25190 case V8QI_FTYPE_V8QI_V8QI:
25191 case V8QI_FTYPE_V4HI_V4HI:
25192 case V8HI_FTYPE_V8HI_V8HI:
25193 case V8HI_FTYPE_V16QI_V16QI:
25194 case V8HI_FTYPE_V4SI_V4SI:
25195 case V8SF_FTYPE_V8SF_V8SF:
25196 case V8SF_FTYPE_V8SF_V8SI:
25197 case V4SI_FTYPE_V4SI_V4SI:
25198 case V4SI_FTYPE_V8HI_V8HI:
25199 case V4SI_FTYPE_V4SF_V4SF:
25200 case V4SI_FTYPE_V2DF_V2DF:
25201 case V4HI_FTYPE_V4HI_V4HI:
25202 case V4HI_FTYPE_V8QI_V8QI:
25203 case V4HI_FTYPE_V2SI_V2SI:
25204 case V4DF_FTYPE_V4DF_V4DF:
25205 case V4DF_FTYPE_V4DF_V4DI:
25206 case V4SF_FTYPE_V4SF_V4SF:
25207 case V4SF_FTYPE_V4SF_V4SI:
25208 case V4SF_FTYPE_V4SF_V2SI:
25209 case V4SF_FTYPE_V4SF_V2DF:
25210 case V4SF_FTYPE_V4SF_DI:
25211 case V4SF_FTYPE_V4SF_SI:
25212 case V2DI_FTYPE_V2DI_V2DI:
25213 case V2DI_FTYPE_V16QI_V16QI:
25214 case V2DI_FTYPE_V4SI_V4SI:
25215 case V2DI_FTYPE_V2DI_V16QI:
25216 case V2DI_FTYPE_V2DF_V2DF:
25217 case V2SI_FTYPE_V2SI_V2SI:
25218 case V2SI_FTYPE_V4HI_V4HI:
25219 case V2SI_FTYPE_V2SF_V2SF:
25220 case V2DF_FTYPE_V2DF_V2DF:
25221 case V2DF_FTYPE_V2DF_V4SF:
25222 case V2DF_FTYPE_V2DF_V2DI:
25223 case V2DF_FTYPE_V2DF_DI:
25224 case V2DF_FTYPE_V2DF_SI:
25225 case V2SF_FTYPE_V2SF_V2SF:
25226 case V1DI_FTYPE_V1DI_V1DI:
25227 case V1DI_FTYPE_V8QI_V8QI:
25228 case V1DI_FTYPE_V2SI_V2SI:
25229 if (comparison == UNKNOWN)
25230 return ix86_expand_binop_builtin (icode, exp, target);
25233 case V4SF_FTYPE_V4SF_V4SF_SWAP:
25234 case V2DF_FTYPE_V2DF_V2DF_SWAP:
25235 gcc_assert (comparison != UNKNOWN);
25239 case V8HI_FTYPE_V8HI_V8HI_COUNT:
25240 case V8HI_FTYPE_V8HI_SI_COUNT:
25241 case V4SI_FTYPE_V4SI_V4SI_COUNT:
25242 case V4SI_FTYPE_V4SI_SI_COUNT:
25243 case V4HI_FTYPE_V4HI_V4HI_COUNT:
25244 case V4HI_FTYPE_V4HI_SI_COUNT:
25245 case V2DI_FTYPE_V2DI_V2DI_COUNT:
25246 case V2DI_FTYPE_V2DI_SI_COUNT:
25247 case V2SI_FTYPE_V2SI_V2SI_COUNT:
25248 case V2SI_FTYPE_V2SI_SI_COUNT:
25249 case V1DI_FTYPE_V1DI_V1DI_COUNT:
25250 case V1DI_FTYPE_V1DI_SI_COUNT:
25252 last_arg_count = true;
25254 case UINT64_FTYPE_UINT64_UINT64:
25255 case UINT_FTYPE_UINT_UINT:
25256 case UINT_FTYPE_UINT_USHORT:
25257 case UINT_FTYPE_UINT_UCHAR:
25258 case UINT16_FTYPE_UINT16_INT:
25259 case UINT8_FTYPE_UINT8_INT:
25262 case V2DI_FTYPE_V2DI_INT_CONVERT:
25265 nargs_constant = 1;
25267 case V8HI_FTYPE_V8HI_INT:
25268 case V8HI_FTYPE_V8SF_INT:
25269 case V8HI_FTYPE_V4SF_INT:
25270 case V8SF_FTYPE_V8SF_INT:
25271 case V4SI_FTYPE_V4SI_INT:
25272 case V4SI_FTYPE_V8SI_INT:
25273 case V4HI_FTYPE_V4HI_INT:
25274 case V4DF_FTYPE_V4DF_INT:
25275 case V4SF_FTYPE_V4SF_INT:
25276 case V4SF_FTYPE_V8SF_INT:
25277 case V2DI_FTYPE_V2DI_INT:
25278 case V2DF_FTYPE_V2DF_INT:
25279 case V2DF_FTYPE_V4DF_INT:
25281 nargs_constant = 1;
25283 case V16QI_FTYPE_V16QI_V16QI_V16QI:
25284 case V8SF_FTYPE_V8SF_V8SF_V8SF:
25285 case V4DF_FTYPE_V4DF_V4DF_V4DF:
25286 case V4SF_FTYPE_V4SF_V4SF_V4SF:
25287 case V2DF_FTYPE_V2DF_V2DF_V2DF:
25290 case V16QI_FTYPE_V16QI_V16QI_INT:
25291 case V8HI_FTYPE_V8HI_V8HI_INT:
25292 case V8SI_FTYPE_V8SI_V8SI_INT:
25293 case V8SI_FTYPE_V8SI_V4SI_INT:
25294 case V8SF_FTYPE_V8SF_V8SF_INT:
25295 case V8SF_FTYPE_V8SF_V4SF_INT:
25296 case V4SI_FTYPE_V4SI_V4SI_INT:
25297 case V4DF_FTYPE_V4DF_V4DF_INT:
25298 case V4DF_FTYPE_V4DF_V2DF_INT:
25299 case V4SF_FTYPE_V4SF_V4SF_INT:
25300 case V2DI_FTYPE_V2DI_V2DI_INT:
25301 case V2DF_FTYPE_V2DF_V2DF_INT:
25303 nargs_constant = 1;
25305 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
25308 nargs_constant = 1;
25310 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
25313 nargs_constant = 1;
25315 case V2DI_FTYPE_V2DI_UINT_UINT:
25317 nargs_constant = 2;
25319 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
25320 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
25321 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
25322 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
25324 nargs_constant = 1;
25326 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
25328 nargs_constant = 2;
25331 gcc_unreachable ();
25334 gcc_assert (nargs <= ARRAY_SIZE (args));
25336 if (comparison != UNKNOWN)
25338 gcc_assert (nargs == 2);
25339 return ix86_expand_sse_compare (d, exp, target, swap);
25342 if (rmode == VOIDmode || rmode == tmode)
25346 || GET_MODE (target) != tmode
25347 || !insn_p->operand[0].predicate (target, tmode))
25348 target = gen_reg_rtx (tmode);
25349 real_target = target;
25353 target = gen_reg_rtx (rmode);
25354 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
25357 for (i = 0; i < nargs; i++)
25359 tree arg = CALL_EXPR_ARG (exp, i);
25360 rtx op = expand_normal (arg);
25361 enum machine_mode mode = insn_p->operand[i + 1].mode;
25362 bool match = insn_p->operand[i + 1].predicate (op, mode);
25364 if (last_arg_count && (i + 1) == nargs)
25366 /* SIMD shift insns take either an 8-bit immediate or
25367 register as count. But builtin functions take int as
25368 count. If count doesn't match, we put it in register. */
25371 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
25372 if (!insn_p->operand[i + 1].predicate (op, mode))
25373 op = copy_to_reg (op);
25376 else if ((nargs - i) <= nargs_constant)
25381 case CODE_FOR_sse4_1_roundpd:
25382 case CODE_FOR_sse4_1_roundps:
25383 case CODE_FOR_sse4_1_roundsd:
25384 case CODE_FOR_sse4_1_roundss:
25385 case CODE_FOR_sse4_1_blendps:
25386 case CODE_FOR_avx_blendpd256:
25387 case CODE_FOR_avx_vpermilv4df:
25388 case CODE_FOR_avx_roundpd256:
25389 case CODE_FOR_avx_roundps256:
25390 error ("the last argument must be a 4-bit immediate");
25393 case CODE_FOR_sse4_1_blendpd:
25394 case CODE_FOR_avx_vpermilv2df:
25395 case CODE_FOR_xop_vpermil2v2df3:
25396 case CODE_FOR_xop_vpermil2v4sf3:
25397 case CODE_FOR_xop_vpermil2v4df3:
25398 case CODE_FOR_xop_vpermil2v8sf3:
25399 error ("the last argument must be a 2-bit immediate");
25402 case CODE_FOR_avx_vextractf128v4df:
25403 case CODE_FOR_avx_vextractf128v8sf:
25404 case CODE_FOR_avx_vextractf128v8si:
25405 case CODE_FOR_avx_vinsertf128v4df:
25406 case CODE_FOR_avx_vinsertf128v8sf:
25407 case CODE_FOR_avx_vinsertf128v8si:
25408 error ("the last argument must be a 1-bit immediate");
25411 case CODE_FOR_avx_cmpsdv2df3:
25412 case CODE_FOR_avx_cmpssv4sf3:
25413 case CODE_FOR_avx_cmppdv2df3:
25414 case CODE_FOR_avx_cmppsv4sf3:
25415 case CODE_FOR_avx_cmppdv4df3:
25416 case CODE_FOR_avx_cmppsv8sf3:
25417 error ("the last argument must be a 5-bit immediate");
25421 switch (nargs_constant)
25424 if ((nargs - i) == nargs_constant)
25426 error ("the next to last argument must be an 8-bit immediate");
25430 error ("the last argument must be an 8-bit immediate");
25433 gcc_unreachable ();
25440 if (VECTOR_MODE_P (mode))
25441 op = safe_vector_operand (op, mode);
25443 /* If we aren't optimizing, only allow one memory operand to
25445 if (memory_operand (op, mode))
25448 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
25450 if (optimize || !match || num_memory > 1)
25451 op = copy_to_mode_reg (mode, op);
25455 op = copy_to_reg (op);
25456 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
25461 args[i].mode = mode;
25467 pat = GEN_FCN (icode) (real_target, args[0].op);
25470 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
25473 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
25477 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
25478 args[2].op, args[3].op);
25481 gcc_unreachable ();
25491 /* Subroutine of ix86_expand_builtin to take care of special insns
25492 with variable number of operands. */
25495 ix86_expand_special_args_builtin (const struct builtin_description *d,
25496 tree exp, rtx target)
25500 unsigned int i, nargs, arg_adjust, memory;
25504 enum machine_mode mode;
25506 enum insn_code icode = d->icode;
25507 bool last_arg_constant = false;
25508 const struct insn_data_d *insn_p = &insn_data[icode];
25509 enum machine_mode tmode = insn_p->operand[0].mode;
25510 enum { load, store } klass;
25512 switch ((enum ix86_builtin_func_type) d->flag)
25514 case VOID_FTYPE_VOID:
25515 emit_insn (GEN_FCN (icode) (target));
25517 case VOID_FTYPE_UINT64:
25518 case VOID_FTYPE_UNSIGNED:
25524 case UINT64_FTYPE_VOID:
25525 case UNSIGNED_FTYPE_VOID:
25526 case UINT16_FTYPE_VOID:
25531 case UINT64_FTYPE_PUNSIGNED:
25532 case V2DI_FTYPE_PV2DI:
25533 case V32QI_FTYPE_PCCHAR:
25534 case V16QI_FTYPE_PCCHAR:
25535 case V8SF_FTYPE_PCV4SF:
25536 case V8SF_FTYPE_PCFLOAT:
25537 case V4SF_FTYPE_PCFLOAT:
25538 case V4DF_FTYPE_PCV2DF:
25539 case V4DF_FTYPE_PCDOUBLE:
25540 case V2DF_FTYPE_PCDOUBLE:
25541 case VOID_FTYPE_PVOID:
25546 case VOID_FTYPE_PV2SF_V4SF:
25547 case VOID_FTYPE_PV4DI_V4DI:
25548 case VOID_FTYPE_PV2DI_V2DI:
25549 case VOID_FTYPE_PCHAR_V32QI:
25550 case VOID_FTYPE_PCHAR_V16QI:
25551 case VOID_FTYPE_PFLOAT_V8SF:
25552 case VOID_FTYPE_PFLOAT_V4SF:
25553 case VOID_FTYPE_PDOUBLE_V4DF:
25554 case VOID_FTYPE_PDOUBLE_V2DF:
25555 case VOID_FTYPE_PULONGLONG_ULONGLONG:
25556 case VOID_FTYPE_PINT_INT:
25559 /* Reserve memory operand for target. */
25560 memory = ARRAY_SIZE (args);
25562 case V4SF_FTYPE_V4SF_PCV2SF:
25563 case V2DF_FTYPE_V2DF_PCDOUBLE:
25568 case V8SF_FTYPE_PCV8SF_V8SF:
25569 case V4DF_FTYPE_PCV4DF_V4DF:
25570 case V4SF_FTYPE_PCV4SF_V4SF:
25571 case V2DF_FTYPE_PCV2DF_V2DF:
25576 case VOID_FTYPE_PV8SF_V8SF_V8SF:
25577 case VOID_FTYPE_PV4DF_V4DF_V4DF:
25578 case VOID_FTYPE_PV4SF_V4SF_V4SF:
25579 case VOID_FTYPE_PV2DF_V2DF_V2DF:
25582 /* Reserve memory operand for target. */
25583 memory = ARRAY_SIZE (args);
25585 case VOID_FTYPE_UINT_UINT_UINT:
25586 case VOID_FTYPE_UINT64_UINT_UINT:
25587 case UCHAR_FTYPE_UINT_UINT_UINT:
25588 case UCHAR_FTYPE_UINT64_UINT_UINT:
25591 memory = ARRAY_SIZE (args);
25592 last_arg_constant = true;
25595 gcc_unreachable ();
25598 gcc_assert (nargs <= ARRAY_SIZE (args));
25600 if (klass == store)
25602 arg = CALL_EXPR_ARG (exp, 0);
25603 op = expand_normal (arg);
25604 gcc_assert (target == 0);
25606 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
25608 target = force_reg (tmode, op);
25616 || GET_MODE (target) != tmode
25617 || !insn_p->operand[0].predicate (target, tmode))
25618 target = gen_reg_rtx (tmode);
25621 for (i = 0; i < nargs; i++)
25623 enum machine_mode mode = insn_p->operand[i + 1].mode;
25626 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
25627 op = expand_normal (arg);
25628 match = insn_p->operand[i + 1].predicate (op, mode);
25630 if (last_arg_constant && (i + 1) == nargs)
25634 if (icode == CODE_FOR_lwp_lwpvalsi3
25635 || icode == CODE_FOR_lwp_lwpinssi3
25636 || icode == CODE_FOR_lwp_lwpvaldi3
25637 || icode == CODE_FOR_lwp_lwpinsdi3)
25638 error ("the last argument must be a 32-bit immediate");
25640 error ("the last argument must be an 8-bit immediate");
25648 /* This must be the memory operand. */
25649 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
25650 gcc_assert (GET_MODE (op) == mode
25651 || GET_MODE (op) == VOIDmode);
25655 /* This must be register. */
25656 if (VECTOR_MODE_P (mode))
25657 op = safe_vector_operand (op, mode);
25659 gcc_assert (GET_MODE (op) == mode
25660 || GET_MODE (op) == VOIDmode);
25661 op = copy_to_mode_reg (mode, op);
25666 args[i].mode = mode;
25672 pat = GEN_FCN (icode) (target);
25675 pat = GEN_FCN (icode) (target, args[0].op);
25678 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
25681 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
25684 gcc_unreachable ();
25690 return klass == store ? 0 : target;
25693 /* Return the integer constant in ARG. Constrain it to be in the range
25694 of the subparts of VEC_TYPE; issue an error if not. */
25697 get_element_number (tree vec_type, tree arg)
25699 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
25701 if (!host_integerp (arg, 1)
25702 || (elt = tree_low_cst (arg, 1), elt > max))
25704 error ("selector must be an integer constant in the range 0..%wi", max);
25711 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25712 ix86_expand_vector_init. We DO have language-level syntax for this, in
25713 the form of (type){ init-list }. Except that since we can't place emms
25714 instructions from inside the compiler, we can't allow the use of MMX
25715 registers unless the user explicitly asks for it. So we do *not* define
25716 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
25717 we have builtins invoked by mmintrin.h that gives us license to emit
25718 these sorts of instructions. */
25721 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
25723 enum machine_mode tmode = TYPE_MODE (type);
25724 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
25725 int i, n_elt = GET_MODE_NUNITS (tmode);
25726 rtvec v = rtvec_alloc (n_elt);
25728 gcc_assert (VECTOR_MODE_P (tmode));
25729 gcc_assert (call_expr_nargs (exp) == n_elt);
25731 for (i = 0; i < n_elt; ++i)
25733 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
25734 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
25737 if (!target || !register_operand (target, tmode))
25738 target = gen_reg_rtx (tmode);
25740 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
25744 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25745 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
25746 had a language-level syntax for referencing vector elements. */
25749 ix86_expand_vec_ext_builtin (tree exp, rtx target)
25751 enum machine_mode tmode, mode0;
25756 arg0 = CALL_EXPR_ARG (exp, 0);
25757 arg1 = CALL_EXPR_ARG (exp, 1);
25759 op0 = expand_normal (arg0);
25760 elt = get_element_number (TREE_TYPE (arg0), arg1);
25762 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25763 mode0 = TYPE_MODE (TREE_TYPE (arg0));
25764 gcc_assert (VECTOR_MODE_P (mode0));
25766 op0 = force_reg (mode0, op0);
25768 if (optimize || !target || !register_operand (target, tmode))
25769 target = gen_reg_rtx (tmode);
25771 ix86_expand_vector_extract (true, target, op0, elt);
25776 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25777 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
25778 a language-level syntax for referencing vector elements. */
25781 ix86_expand_vec_set_builtin (tree exp)
25783 enum machine_mode tmode, mode1;
25784 tree arg0, arg1, arg2;
25786 rtx op0, op1, target;
25788 arg0 = CALL_EXPR_ARG (exp, 0);
25789 arg1 = CALL_EXPR_ARG (exp, 1);
25790 arg2 = CALL_EXPR_ARG (exp, 2);
25792 tmode = TYPE_MODE (TREE_TYPE (arg0));
25793 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25794 gcc_assert (VECTOR_MODE_P (tmode));
25796 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
25797 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
25798 elt = get_element_number (TREE_TYPE (arg0), arg2);
25800 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
25801 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
25803 op0 = force_reg (tmode, op0);
25804 op1 = force_reg (mode1, op1);
25806 /* OP0 is the source of these builtin functions and shouldn't be
25807 modified. Create a copy, use it and return it as target. */
25808 target = gen_reg_rtx (tmode);
25809 emit_move_insn (target, op0);
25810 ix86_expand_vector_set (true, target, op1, elt);
25815 /* Expand an expression EXP that calls a built-in function,
25816 with result going to TARGET if that's convenient
25817 (and in mode MODE if that's convenient).
25818 SUBTARGET may be used as the target for computing one of EXP's operands.
25819 IGNORE is nonzero if the value is to be ignored. */
25822 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
25823 enum machine_mode mode ATTRIBUTE_UNUSED,
25824 int ignore ATTRIBUTE_UNUSED)
25826 const struct builtin_description *d;
25828 enum insn_code icode;
25829 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25830 tree arg0, arg1, arg2;
25831 rtx op0, op1, op2, pat;
25832 enum machine_mode mode0, mode1, mode2;
25833 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25835 /* Determine whether the builtin function is available under the current ISA.
25836 Originally the builtin was not created if it wasn't applicable to the
25837 current ISA based on the command line switches. With function specific
25838 options, we need to check in the context of the function making the call
25839 whether it is supported. */
25840 if (ix86_builtins_isa[fcode].isa
25841 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
25843 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
25844 NULL, NULL, false);
25847 error ("%qE needs unknown isa option", fndecl);
25850 gcc_assert (opts != NULL);
25851 error ("%qE needs isa option %s", fndecl, opts);
25859 case IX86_BUILTIN_MASKMOVQ:
25860 case IX86_BUILTIN_MASKMOVDQU:
25861 icode = (fcode == IX86_BUILTIN_MASKMOVQ
25862 ? CODE_FOR_mmx_maskmovq
25863 : CODE_FOR_sse2_maskmovdqu);
25864 /* Note the arg order is different from the operand order. */
25865 arg1 = CALL_EXPR_ARG (exp, 0);
25866 arg2 = CALL_EXPR_ARG (exp, 1);
25867 arg0 = CALL_EXPR_ARG (exp, 2);
25868 op0 = expand_normal (arg0);
25869 op1 = expand_normal (arg1);
25870 op2 = expand_normal (arg2);
25871 mode0 = insn_data[icode].operand[0].mode;
25872 mode1 = insn_data[icode].operand[1].mode;
25873 mode2 = insn_data[icode].operand[2].mode;
25875 op0 = force_reg (Pmode, op0);
25876 op0 = gen_rtx_MEM (mode1, op0);
25878 if (!insn_data[icode].operand[0].predicate (op0, mode0))
25879 op0 = copy_to_mode_reg (mode0, op0);
25880 if (!insn_data[icode].operand[1].predicate (op1, mode1))
25881 op1 = copy_to_mode_reg (mode1, op1);
25882 if (!insn_data[icode].operand[2].predicate (op2, mode2))
25883 op2 = copy_to_mode_reg (mode2, op2);
25884 pat = GEN_FCN (icode) (op0, op1, op2);
25890 case IX86_BUILTIN_LDMXCSR:
25891 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
25892 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25893 emit_move_insn (target, op0);
25894 emit_insn (gen_sse_ldmxcsr (target));
25897 case IX86_BUILTIN_STMXCSR:
25898 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25899 emit_insn (gen_sse_stmxcsr (target));
25900 return copy_to_mode_reg (SImode, target);
25902 case IX86_BUILTIN_CLFLUSH:
25903 arg0 = CALL_EXPR_ARG (exp, 0);
25904 op0 = expand_normal (arg0);
25905 icode = CODE_FOR_sse2_clflush;
25906 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
25907 op0 = copy_to_mode_reg (Pmode, op0);
25909 emit_insn (gen_sse2_clflush (op0));
25912 case IX86_BUILTIN_MONITOR:
25913 arg0 = CALL_EXPR_ARG (exp, 0);
25914 arg1 = CALL_EXPR_ARG (exp, 1);
25915 arg2 = CALL_EXPR_ARG (exp, 2);
25916 op0 = expand_normal (arg0);
25917 op1 = expand_normal (arg1);
25918 op2 = expand_normal (arg2);
25920 op0 = copy_to_mode_reg (Pmode, op0);
25922 op1 = copy_to_mode_reg (SImode, op1);
25924 op2 = copy_to_mode_reg (SImode, op2);
25925 emit_insn (ix86_gen_monitor (op0, op1, op2));
25928 case IX86_BUILTIN_MWAIT:
25929 arg0 = CALL_EXPR_ARG (exp, 0);
25930 arg1 = CALL_EXPR_ARG (exp, 1);
25931 op0 = expand_normal (arg0);
25932 op1 = expand_normal (arg1);
25934 op0 = copy_to_mode_reg (SImode, op0);
25936 op1 = copy_to_mode_reg (SImode, op1);
25937 emit_insn (gen_sse3_mwait (op0, op1));
25940 case IX86_BUILTIN_VEC_INIT_V2SI:
25941 case IX86_BUILTIN_VEC_INIT_V4HI:
25942 case IX86_BUILTIN_VEC_INIT_V8QI:
25943 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
25945 case IX86_BUILTIN_VEC_EXT_V2DF:
25946 case IX86_BUILTIN_VEC_EXT_V2DI:
25947 case IX86_BUILTIN_VEC_EXT_V4SF:
25948 case IX86_BUILTIN_VEC_EXT_V4SI:
25949 case IX86_BUILTIN_VEC_EXT_V8HI:
25950 case IX86_BUILTIN_VEC_EXT_V2SI:
25951 case IX86_BUILTIN_VEC_EXT_V4HI:
25952 case IX86_BUILTIN_VEC_EXT_V16QI:
25953 return ix86_expand_vec_ext_builtin (exp, target);
25955 case IX86_BUILTIN_VEC_SET_V2DI:
25956 case IX86_BUILTIN_VEC_SET_V4SF:
25957 case IX86_BUILTIN_VEC_SET_V4SI:
25958 case IX86_BUILTIN_VEC_SET_V8HI:
25959 case IX86_BUILTIN_VEC_SET_V4HI:
25960 case IX86_BUILTIN_VEC_SET_V16QI:
25961 return ix86_expand_vec_set_builtin (exp);
25963 case IX86_BUILTIN_VEC_PERM_V2DF:
25964 case IX86_BUILTIN_VEC_PERM_V4SF:
25965 case IX86_BUILTIN_VEC_PERM_V2DI:
25966 case IX86_BUILTIN_VEC_PERM_V4SI:
25967 case IX86_BUILTIN_VEC_PERM_V8HI:
25968 case IX86_BUILTIN_VEC_PERM_V16QI:
25969 case IX86_BUILTIN_VEC_PERM_V2DI_U:
25970 case IX86_BUILTIN_VEC_PERM_V4SI_U:
25971 case IX86_BUILTIN_VEC_PERM_V8HI_U:
25972 case IX86_BUILTIN_VEC_PERM_V16QI_U:
25973 case IX86_BUILTIN_VEC_PERM_V4DF:
25974 case IX86_BUILTIN_VEC_PERM_V8SF:
25975 return ix86_expand_vec_perm_builtin (exp);
25977 case IX86_BUILTIN_INFQ:
25978 case IX86_BUILTIN_HUGE_VALQ:
25980 REAL_VALUE_TYPE inf;
25984 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
25986 tmp = validize_mem (force_const_mem (mode, tmp));
25989 target = gen_reg_rtx (mode);
25991 emit_move_insn (target, tmp);
25995 case IX86_BUILTIN_LLWPCB:
25996 arg0 = CALL_EXPR_ARG (exp, 0);
25997 op0 = expand_normal (arg0);
25998 icode = CODE_FOR_lwp_llwpcb;
25999 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
26000 op0 = copy_to_mode_reg (Pmode, op0);
26001 emit_insn (gen_lwp_llwpcb (op0));
26004 case IX86_BUILTIN_SLWPCB:
26005 icode = CODE_FOR_lwp_slwpcb;
26007 || !insn_data[icode].operand[0].predicate (target, Pmode))
26008 target = gen_reg_rtx (Pmode);
26009 emit_insn (gen_lwp_slwpcb (target));
26016 for (i = 0, d = bdesc_special_args;
26017 i < ARRAY_SIZE (bdesc_special_args);
26019 if (d->code == fcode)
26020 return ix86_expand_special_args_builtin (d, exp, target);
26022 for (i = 0, d = bdesc_args;
26023 i < ARRAY_SIZE (bdesc_args);
26025 if (d->code == fcode)
26028 case IX86_BUILTIN_FABSQ:
26029 case IX86_BUILTIN_COPYSIGNQ:
26031 /* Emit a normal call if SSE2 isn't available. */
26032 return expand_call (exp, target, ignore);
26034 return ix86_expand_args_builtin (d, exp, target);
26037 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
26038 if (d->code == fcode)
26039 return ix86_expand_sse_comi (d, exp, target);
26041 for (i = 0, d = bdesc_pcmpestr;
26042 i < ARRAY_SIZE (bdesc_pcmpestr);
26044 if (d->code == fcode)
26045 return ix86_expand_sse_pcmpestr (d, exp, target);
26047 for (i = 0, d = bdesc_pcmpistr;
26048 i < ARRAY_SIZE (bdesc_pcmpistr);
26050 if (d->code == fcode)
26051 return ix86_expand_sse_pcmpistr (d, exp, target);
26053 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
26054 if (d->code == fcode)
26055 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
26056 (enum ix86_builtin_func_type)
26057 d->flag, d->comparison);
26059 gcc_unreachable ();
26062 /* Returns a function decl for a vectorized version of the builtin function
26063 with builtin function code FN and the result vector type TYPE, or NULL_TREE
26064 if it is not available. */
26067 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
26070 enum machine_mode in_mode, out_mode;
26072 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
26074 if (TREE_CODE (type_out) != VECTOR_TYPE
26075 || TREE_CODE (type_in) != VECTOR_TYPE
26076 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
26079 out_mode = TYPE_MODE (TREE_TYPE (type_out));
26080 out_n = TYPE_VECTOR_SUBPARTS (type_out);
26081 in_mode = TYPE_MODE (TREE_TYPE (type_in));
26082 in_n = TYPE_VECTOR_SUBPARTS (type_in);
26086 case BUILT_IN_SQRT:
26087 if (out_mode == DFmode && in_mode == DFmode)
26089 if (out_n == 2 && in_n == 2)
26090 return ix86_builtins[IX86_BUILTIN_SQRTPD];
26091 else if (out_n == 4 && in_n == 4)
26092 return ix86_builtins[IX86_BUILTIN_SQRTPD256];
26096 case BUILT_IN_SQRTF:
26097 if (out_mode == SFmode && in_mode == SFmode)
26099 if (out_n == 4 && in_n == 4)
26100 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
26101 else if (out_n == 8 && in_n == 8)
26102 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR256];
26106 case BUILT_IN_LRINT:
26107 if (out_mode == SImode && out_n == 4
26108 && in_mode == DFmode && in_n == 2)
26109 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
26112 case BUILT_IN_LRINTF:
26113 if (out_mode == SImode && in_mode == SFmode)
26115 if (out_n == 4 && in_n == 4)
26116 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
26117 else if (out_n == 8 && in_n == 8)
26118 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ256];
26122 case BUILT_IN_COPYSIGN:
26123 if (out_mode == DFmode && in_mode == DFmode)
26125 if (out_n == 2 && in_n == 2)
26126 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
26127 else if (out_n == 4 && in_n == 4)
26128 return ix86_builtins[IX86_BUILTIN_CPYSGNPD256];
26132 case BUILT_IN_COPYSIGNF:
26133 if (out_mode == SFmode && in_mode == SFmode)
26135 if (out_n == 4 && in_n == 4)
26136 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
26137 else if (out_n == 8 && in_n == 8)
26138 return ix86_builtins[IX86_BUILTIN_CPYSGNPS256];
26146 /* Dispatch to a handler for a vectorization library. */
26147 if (ix86_veclib_handler)
26148 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
26154 /* Handler for an SVML-style interface to
26155 a library with vectorized intrinsics. */
26158 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
26161 tree fntype, new_fndecl, args;
26164 enum machine_mode el_mode, in_mode;
26167 /* The SVML is suitable for unsafe math only. */
26168 if (!flag_unsafe_math_optimizations)
26171 el_mode = TYPE_MODE (TREE_TYPE (type_out));
26172 n = TYPE_VECTOR_SUBPARTS (type_out);
26173 in_mode = TYPE_MODE (TREE_TYPE (type_in));
26174 in_n = TYPE_VECTOR_SUBPARTS (type_in);
26175 if (el_mode != in_mode
26183 case BUILT_IN_LOG10:
26185 case BUILT_IN_TANH:
26187 case BUILT_IN_ATAN:
26188 case BUILT_IN_ATAN2:
26189 case BUILT_IN_ATANH:
26190 case BUILT_IN_CBRT:
26191 case BUILT_IN_SINH:
26193 case BUILT_IN_ASINH:
26194 case BUILT_IN_ASIN:
26195 case BUILT_IN_COSH:
26197 case BUILT_IN_ACOSH:
26198 case BUILT_IN_ACOS:
26199 if (el_mode != DFmode || n != 2)
26203 case BUILT_IN_EXPF:
26204 case BUILT_IN_LOGF:
26205 case BUILT_IN_LOG10F:
26206 case BUILT_IN_POWF:
26207 case BUILT_IN_TANHF:
26208 case BUILT_IN_TANF:
26209 case BUILT_IN_ATANF:
26210 case BUILT_IN_ATAN2F:
26211 case BUILT_IN_ATANHF:
26212 case BUILT_IN_CBRTF:
26213 case BUILT_IN_SINHF:
26214 case BUILT_IN_SINF:
26215 case BUILT_IN_ASINHF:
26216 case BUILT_IN_ASINF:
26217 case BUILT_IN_COSHF:
26218 case BUILT_IN_COSF:
26219 case BUILT_IN_ACOSHF:
26220 case BUILT_IN_ACOSF:
26221 if (el_mode != SFmode || n != 4)
26229 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
26231 if (fn == BUILT_IN_LOGF)
26232 strcpy (name, "vmlsLn4");
26233 else if (fn == BUILT_IN_LOG)
26234 strcpy (name, "vmldLn2");
26237 sprintf (name, "vmls%s", bname+10);
26238 name[strlen (name)-1] = '4';
26241 sprintf (name, "vmld%s2", bname+10);
26243 /* Convert to uppercase. */
26247 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
26248 args = TREE_CHAIN (args))
26252 fntype = build_function_type_list (type_out, type_in, NULL);
26254 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
26256 /* Build a function declaration for the vectorized function. */
26257 new_fndecl = build_decl (BUILTINS_LOCATION,
26258 FUNCTION_DECL, get_identifier (name), fntype);
26259 TREE_PUBLIC (new_fndecl) = 1;
26260 DECL_EXTERNAL (new_fndecl) = 1;
26261 DECL_IS_NOVOPS (new_fndecl) = 1;
26262 TREE_READONLY (new_fndecl) = 1;
26267 /* Handler for an ACML-style interface to
26268 a library with vectorized intrinsics. */
26271 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
26273 char name[20] = "__vr.._";
26274 tree fntype, new_fndecl, args;
26277 enum machine_mode el_mode, in_mode;
26280 /* The ACML is 64bits only and suitable for unsafe math only as
26281 it does not correctly support parts of IEEE with the required
26282 precision such as denormals. */
26284 || !flag_unsafe_math_optimizations)
26287 el_mode = TYPE_MODE (TREE_TYPE (type_out));
26288 n = TYPE_VECTOR_SUBPARTS (type_out);
26289 in_mode = TYPE_MODE (TREE_TYPE (type_in));
26290 in_n = TYPE_VECTOR_SUBPARTS (type_in);
26291 if (el_mode != in_mode
26301 case BUILT_IN_LOG2:
26302 case BUILT_IN_LOG10:
26305 if (el_mode != DFmode
26310 case BUILT_IN_SINF:
26311 case BUILT_IN_COSF:
26312 case BUILT_IN_EXPF:
26313 case BUILT_IN_POWF:
26314 case BUILT_IN_LOGF:
26315 case BUILT_IN_LOG2F:
26316 case BUILT_IN_LOG10F:
26319 if (el_mode != SFmode
26328 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
26329 sprintf (name + 7, "%s", bname+10);
26332 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
26333 args = TREE_CHAIN (args))
26337 fntype = build_function_type_list (type_out, type_in, NULL);
26339 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
26341 /* Build a function declaration for the vectorized function. */
26342 new_fndecl = build_decl (BUILTINS_LOCATION,
26343 FUNCTION_DECL, get_identifier (name), fntype);
26344 TREE_PUBLIC (new_fndecl) = 1;
26345 DECL_EXTERNAL (new_fndecl) = 1;
26346 DECL_IS_NOVOPS (new_fndecl) = 1;
26347 TREE_READONLY (new_fndecl) = 1;
26353 /* Returns a decl of a function that implements conversion of an integer vector
26354 into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
26355 are the types involved when converting according to CODE.
26356 Return NULL_TREE if it is not available. */
26359 ix86_vectorize_builtin_conversion (unsigned int code,
26360 tree dest_type, tree src_type)
26368 switch (TYPE_MODE (src_type))
26371 switch (TYPE_MODE (dest_type))
26374 return (TYPE_UNSIGNED (src_type)
26375 ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
26376 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
26378 return (TYPE_UNSIGNED (src_type)
26380 : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
26386 switch (TYPE_MODE (dest_type))
26389 return (TYPE_UNSIGNED (src_type)
26391 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS256]);
26400 case FIX_TRUNC_EXPR:
26401 switch (TYPE_MODE (dest_type))
26404 switch (TYPE_MODE (src_type))
26407 return (TYPE_UNSIGNED (dest_type)
26409 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
26411 return (TYPE_UNSIGNED (dest_type)
26413 : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
26420 switch (TYPE_MODE (src_type))
26423 return (TYPE_UNSIGNED (dest_type)
26425 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
26442 /* Returns a code for a target-specific builtin that implements
26443 reciprocal of the function, or NULL_TREE if not available. */
26446 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
26447 bool sqrt ATTRIBUTE_UNUSED)
26449 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
26450 && flag_finite_math_only && !flag_trapping_math
26451 && flag_unsafe_math_optimizations))
26455 /* Machine dependent builtins. */
26458 /* Vectorized version of sqrt to rsqrt conversion. */
26459 case IX86_BUILTIN_SQRTPS_NR:
26460 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
26462 case IX86_BUILTIN_SQRTPS_NR256:
26463 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR256];
26469 /* Normal builtins. */
26472 /* Sqrt to rsqrt conversion. */
26473 case BUILT_IN_SQRTF:
26474 return ix86_builtins[IX86_BUILTIN_RSQRTF];
26481 /* Helper for avx_vpermilps256_operand et al. This is also used by
26482 the expansion functions to turn the parallel back into a mask.
26483 The return value is 0 for no match and the imm8+1 for a match. */
26486 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
26488 unsigned i, nelt = GET_MODE_NUNITS (mode);
26490 unsigned char ipar[8];
26492 if (XVECLEN (par, 0) != (int) nelt)
26495 /* Validate that all of the elements are constants, and not totally
26496 out of range. Copy the data into an integral array to make the
26497 subsequent checks easier. */
26498 for (i = 0; i < nelt; ++i)
26500 rtx er = XVECEXP (par, 0, i);
26501 unsigned HOST_WIDE_INT ei;
26503 if (!CONST_INT_P (er))
26514 /* In the 256-bit DFmode case, we can only move elements within
26516 for (i = 0; i < 2; ++i)
26520 mask |= ipar[i] << i;
26522 for (i = 2; i < 4; ++i)
26526 mask |= (ipar[i] - 2) << i;
26531 /* In the 256-bit SFmode case, we have full freedom of movement
26532 within the low 128-bit lane, but the high 128-bit lane must
26533 mirror the exact same pattern. */
26534 for (i = 0; i < 4; ++i)
26535 if (ipar[i] + 4 != ipar[i + 4])
26542 /* In the 128-bit case, we've full freedom in the placement of
26543 the elements from the source operand. */
26544 for (i = 0; i < nelt; ++i)
26545 mask |= ipar[i] << (i * (nelt / 2));
26549 gcc_unreachable ();
26552 /* Make sure success has a non-zero value by adding one. */
26556 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
26557 the expansion functions to turn the parallel back into a mask.
26558 The return value is 0 for no match and the imm8+1 for a match. */
26561 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
26563 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
26565 unsigned char ipar[8];
26567 if (XVECLEN (par, 0) != (int) nelt)
26570 /* Validate that all of the elements are constants, and not totally
26571 out of range. Copy the data into an integral array to make the
26572 subsequent checks easier. */
26573 for (i = 0; i < nelt; ++i)
26575 rtx er = XVECEXP (par, 0, i);
26576 unsigned HOST_WIDE_INT ei;
26578 if (!CONST_INT_P (er))
26581 if (ei >= 2 * nelt)
26586 /* Validate that the halves of the permute are halves. */
26587 for (i = 0; i < nelt2 - 1; ++i)
26588 if (ipar[i] + 1 != ipar[i + 1])
26590 for (i = nelt2; i < nelt - 1; ++i)
26591 if (ipar[i] + 1 != ipar[i + 1])
26594 /* Reconstruct the mask. */
26595 for (i = 0; i < 2; ++i)
26597 unsigned e = ipar[i * nelt2];
26601 mask |= e << (i * 4);
26604 /* Make sure success has a non-zero value by adding one. */
26609 /* Store OPERAND to the memory after reload is completed. This means
26610 that we can't easily use assign_stack_local. */
26612 ix86_force_to_memory (enum machine_mode mode, rtx operand)
26616 gcc_assert (reload_completed);
26617 if (ix86_using_red_zone ())
26619 result = gen_rtx_MEM (mode,
26620 gen_rtx_PLUS (Pmode,
26622 GEN_INT (-RED_ZONE_SIZE)));
26623 emit_move_insn (result, operand);
26625 else if (TARGET_64BIT)
26631 operand = gen_lowpart (DImode, operand);
26635 gen_rtx_SET (VOIDmode,
26636 gen_rtx_MEM (DImode,
26637 gen_rtx_PRE_DEC (DImode,
26638 stack_pointer_rtx)),
26642 gcc_unreachable ();
26644 result = gen_rtx_MEM (mode, stack_pointer_rtx);
26653 split_double_mode (mode, &operand, 1, operands, operands + 1);
26655 gen_rtx_SET (VOIDmode,
26656 gen_rtx_MEM (SImode,
26657 gen_rtx_PRE_DEC (Pmode,
26658 stack_pointer_rtx)),
26661 gen_rtx_SET (VOIDmode,
26662 gen_rtx_MEM (SImode,
26663 gen_rtx_PRE_DEC (Pmode,
26664 stack_pointer_rtx)),
26669 /* Store HImodes as SImodes. */
26670 operand = gen_lowpart (SImode, operand);
26674 gen_rtx_SET (VOIDmode,
26675 gen_rtx_MEM (GET_MODE (operand),
26676 gen_rtx_PRE_DEC (SImode,
26677 stack_pointer_rtx)),
26681 gcc_unreachable ();
26683 result = gen_rtx_MEM (mode, stack_pointer_rtx);
26688 /* Free operand from the memory. */
26690 ix86_free_from_memory (enum machine_mode mode)
26692 if (!ix86_using_red_zone ())
26696 if (mode == DImode || TARGET_64BIT)
26700 /* Use LEA to deallocate stack space. In peephole2 it will be converted
26701 to pop or add instruction if registers are available. */
26702 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
26703 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
26708 /* Implement TARGET_IRA_COVER_CLASSES. If -mfpmath=sse, we prefer
26709 SSE_REGS to FLOAT_REGS if their costs for a pseudo are the
26711 static const reg_class_t *
26712 i386_ira_cover_classes (void)
26714 static const reg_class_t sse_fpmath_classes[] = {
26715 GENERAL_REGS, SSE_REGS, MMX_REGS, FLOAT_REGS, LIM_REG_CLASSES
26717 static const reg_class_t no_sse_fpmath_classes[] = {
26718 GENERAL_REGS, FLOAT_REGS, MMX_REGS, SSE_REGS, LIM_REG_CLASSES
26721 return TARGET_SSE_MATH ? sse_fpmath_classes : no_sse_fpmath_classes;
26724 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
26726 Put float CONST_DOUBLE in the constant pool instead of fp regs.
26727 QImode must go into class Q_REGS.
26728 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
26729 movdf to do mem-to-mem moves through integer regs. */
26732 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
26734 enum machine_mode mode = GET_MODE (x);
26736 /* We're only allowed to return a subclass of CLASS. Many of the
26737 following checks fail for NO_REGS, so eliminate that early. */
26738 if (regclass == NO_REGS)
26741 /* All classes can load zeros. */
26742 if (x == CONST0_RTX (mode))
26745 /* Force constants into memory if we are loading a (nonzero) constant into
26746 an MMX or SSE register. This is because there are no MMX/SSE instructions
26747 to load from a constant. */
26749 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
26752 /* Prefer SSE regs only, if we can use them for math. */
26753 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
26754 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
26756 /* Floating-point constants need more complex checks. */
26757 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
26759 /* General regs can load everything. */
26760 if (reg_class_subset_p (regclass, GENERAL_REGS))
26763 /* Floats can load 0 and 1 plus some others. Note that we eliminated
26764 zero above. We only want to wind up preferring 80387 registers if
26765 we plan on doing computation with them. */
26767 && standard_80387_constant_p (x))
26769 /* Limit class to non-sse. */
26770 if (regclass == FLOAT_SSE_REGS)
26772 if (regclass == FP_TOP_SSE_REGS)
26774 if (regclass == FP_SECOND_SSE_REGS)
26775 return FP_SECOND_REG;
26776 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
26783 /* Generally when we see PLUS here, it's the function invariant
26784 (plus soft-fp const_int). Which can only be computed into general
26786 if (GET_CODE (x) == PLUS)
26787 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
26789 /* QImode constants are easy to load, but non-constant QImode data
26790 must go into Q_REGS. */
26791 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
26793 if (reg_class_subset_p (regclass, Q_REGS))
26795 if (reg_class_subset_p (Q_REGS, regclass))
26803 /* Discourage putting floating-point values in SSE registers unless
26804 SSE math is being used, and likewise for the 387 registers. */
26806 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
26808 enum machine_mode mode = GET_MODE (x);
26810 /* Restrict the output reload class to the register bank that we are doing
26811 math on. If we would like not to return a subset of CLASS, reject this
26812 alternative: if reload cannot do this, it will still use its choice. */
26813 mode = GET_MODE (x);
26814 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
26815 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
26817 if (X87_FLOAT_MODE_P (mode))
26819 if (regclass == FP_TOP_SSE_REGS)
26821 else if (regclass == FP_SECOND_SSE_REGS)
26822 return FP_SECOND_REG;
26824 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
26831 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
26832 enum machine_mode mode,
26833 secondary_reload_info *sri ATTRIBUTE_UNUSED)
26835 /* QImode spills from non-QI registers require
26836 intermediate register on 32bit targets. */
26837 if (!in_p && mode == QImode && !TARGET_64BIT
26838 && (rclass == GENERAL_REGS
26839 || rclass == LEGACY_REGS
26840 || rclass == INDEX_REGS))
26849 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
26850 regno = true_regnum (x);
26852 /* Return Q_REGS if the operand is in memory. */
26860 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
26863 ix86_class_likely_spilled_p (reg_class_t rclass)
26874 case SSE_FIRST_REG:
26876 case FP_SECOND_REG:
26886 /* If we are copying between general and FP registers, we need a memory
26887 location. The same is true for SSE and MMX registers.
26889 To optimize register_move_cost performance, allow inline variant.
26891 The macro can't work reliably when one of the CLASSES is class containing
26892 registers from multiple units (SSE, MMX, integer). We avoid this by never
26893 combining those units in single alternative in the machine description.
26894 Ensure that this constraint holds to avoid unexpected surprises.
26896 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
26897 enforce these sanity checks. */
26900 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26901 enum machine_mode mode, int strict)
26903 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
26904 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
26905 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
26906 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
26907 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
26908 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
26910 gcc_assert (!strict);
26914 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
26917 /* ??? This is a lie. We do have moves between mmx/general, and for
26918 mmx/sse2. But by saying we need secondary memory we discourage the
26919 register allocator from using the mmx registers unless needed. */
26920 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
26923 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26925 /* SSE1 doesn't have any direct moves from other classes. */
26929 /* If the target says that inter-unit moves are more expensive
26930 than moving through memory, then don't generate them. */
26931 if (!TARGET_INTER_UNIT_MOVES)
26934 /* Between SSE and general, we have moves no larger than word size. */
26935 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
26943 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26944 enum machine_mode mode, int strict)
26946 return inline_secondary_memory_needed (class1, class2, mode, strict);
26949 /* Return true if the registers in CLASS cannot represent the change from
26950 modes FROM to TO. */
26953 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
26954 enum reg_class regclass)
26959 /* x87 registers can't do subreg at all, as all values are reformatted
26960 to extended precision. */
26961 if (MAYBE_FLOAT_CLASS_P (regclass))
26964 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
26966 /* Vector registers do not support QI or HImode loads. If we don't
26967 disallow a change to these modes, reload will assume it's ok to
26968 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
26969 the vec_dupv4hi pattern. */
26970 if (GET_MODE_SIZE (from) < 4)
26973 /* Vector registers do not support subreg with nonzero offsets, which
26974 are otherwise valid for integer registers. Since we can't see
26975 whether we have a nonzero offset from here, prohibit all
26976 nonparadoxical subregs changing size. */
26977 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
26984 /* Return the cost of moving data of mode M between a
26985 register and memory. A value of 2 is the default; this cost is
26986 relative to those in `REGISTER_MOVE_COST'.
26988 This function is used extensively by register_move_cost that is used to
26989 build tables at startup. Make it inline in this case.
26990 When IN is 2, return maximum of in and out move cost.
26992 If moving between registers and memory is more expensive than
26993 between two registers, you should define this macro to express the
26996 Model also increased moving costs of QImode registers in non
27000 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
27004 if (FLOAT_CLASS_P (regclass))
27022 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
27023 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
27025 if (SSE_CLASS_P (regclass))
27028 switch (GET_MODE_SIZE (mode))
27043 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
27044 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
27046 if (MMX_CLASS_P (regclass))
27049 switch (GET_MODE_SIZE (mode))
27061 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
27062 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
27064 switch (GET_MODE_SIZE (mode))
27067 if (Q_CLASS_P (regclass) || TARGET_64BIT)
27070 return ix86_cost->int_store[0];
27071 if (TARGET_PARTIAL_REG_DEPENDENCY
27072 && optimize_function_for_speed_p (cfun))
27073 cost = ix86_cost->movzbl_load;
27075 cost = ix86_cost->int_load[0];
27077 return MAX (cost, ix86_cost->int_store[0]);
27083 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
27085 return ix86_cost->movzbl_load;
27087 return ix86_cost->int_store[0] + 4;
27092 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
27093 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
27095 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
27096 if (mode == TFmode)
27099 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
27101 cost = ix86_cost->int_load[2];
27103 cost = ix86_cost->int_store[2];
27104 return (cost * (((int) GET_MODE_SIZE (mode)
27105 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
27110 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
27113 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
27117 /* Return the cost of moving data from a register in class CLASS1 to
27118 one in class CLASS2.
27120 It is not required that the cost always equal 2 when FROM is the same as TO;
27121 on some machines it is expensive to move between registers if they are not
27122 general registers. */
27125 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
27126 reg_class_t class2_i)
27128 enum reg_class class1 = (enum reg_class) class1_i;
27129 enum reg_class class2 = (enum reg_class) class2_i;
27131 /* In case we require secondary memory, compute cost of the store followed
27132 by load. In order to avoid bad register allocation choices, we need
27133 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
27135 if (inline_secondary_memory_needed (class1, class2, mode, 0))
27139 cost += inline_memory_move_cost (mode, class1, 2);
27140 cost += inline_memory_move_cost (mode, class2, 2);
27142 /* In case of copying from general_purpose_register we may emit multiple
27143 stores followed by single load causing memory size mismatch stall.
27144 Count this as arbitrarily high cost of 20. */
27145 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
27148 /* In the case of FP/MMX moves, the registers actually overlap, and we
27149 have to switch modes in order to treat them differently. */
27150 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
27151 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
27157 /* Moves between SSE/MMX and integer unit are expensive. */
27158 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
27159 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
27161 /* ??? By keeping returned value relatively high, we limit the number
27162 of moves between integer and MMX/SSE registers for all targets.
27163 Additionally, high value prevents problem with x86_modes_tieable_p(),
27164 where integer modes in MMX/SSE registers are not tieable
27165 because of missing QImode and HImode moves to, from or between
27166 MMX/SSE registers. */
27167 return MAX (8, ix86_cost->mmxsse_to_integer);
27169 if (MAYBE_FLOAT_CLASS_P (class1))
27170 return ix86_cost->fp_move;
27171 if (MAYBE_SSE_CLASS_P (class1))
27172 return ix86_cost->sse_move;
27173 if (MAYBE_MMX_CLASS_P (class1))
27174 return ix86_cost->mmx_move;
27178 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
27181 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
27183 /* Flags and only flags can only hold CCmode values. */
27184 if (CC_REGNO_P (regno))
27185 return GET_MODE_CLASS (mode) == MODE_CC;
27186 if (GET_MODE_CLASS (mode) == MODE_CC
27187 || GET_MODE_CLASS (mode) == MODE_RANDOM
27188 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
27190 if (FP_REGNO_P (regno))
27191 return VALID_FP_MODE_P (mode);
27192 if (SSE_REGNO_P (regno))
27194 /* We implement the move patterns for all vector modes into and
27195 out of SSE registers, even when no operation instructions
27196 are available. OImode move is available only when AVX is
27198 return ((TARGET_AVX && mode == OImode)
27199 || VALID_AVX256_REG_MODE (mode)
27200 || VALID_SSE_REG_MODE (mode)
27201 || VALID_SSE2_REG_MODE (mode)
27202 || VALID_MMX_REG_MODE (mode)
27203 || VALID_MMX_REG_MODE_3DNOW (mode));
27205 if (MMX_REGNO_P (regno))
27207 /* We implement the move patterns for 3DNOW modes even in MMX mode,
27208 so if the register is available at all, then we can move data of
27209 the given mode into or out of it. */
27210 return (VALID_MMX_REG_MODE (mode)
27211 || VALID_MMX_REG_MODE_3DNOW (mode));
27214 if (mode == QImode)
27216 /* Take care for QImode values - they can be in non-QI regs,
27217 but then they do cause partial register stalls. */
27218 if (regno <= BX_REG || TARGET_64BIT)
27220 if (!TARGET_PARTIAL_REG_STALL)
27222 return reload_in_progress || reload_completed;
27224 /* We handle both integer and floats in the general purpose registers. */
27225 else if (VALID_INT_MODE_P (mode))
27227 else if (VALID_FP_MODE_P (mode))
27229 else if (VALID_DFP_MODE_P (mode))
27231 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
27232 on to use that value in smaller contexts, this can easily force a
27233 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
27234 supporting DImode, allow it. */
27235 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
27241 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
27242 tieable integer mode. */
27245 ix86_tieable_integer_mode_p (enum machine_mode mode)
27254 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
27257 return TARGET_64BIT;
27264 /* Return true if MODE1 is accessible in a register that can hold MODE2
27265 without copying. That is, all register classes that can hold MODE2
27266 can also hold MODE1. */
27269 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
27271 if (mode1 == mode2)
27274 if (ix86_tieable_integer_mode_p (mode1)
27275 && ix86_tieable_integer_mode_p (mode2))
27278 /* MODE2 being XFmode implies fp stack or general regs, which means we
27279 can tie any smaller floating point modes to it. Note that we do not
27280 tie this with TFmode. */
27281 if (mode2 == XFmode)
27282 return mode1 == SFmode || mode1 == DFmode;
27284 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
27285 that we can tie it with SFmode. */
27286 if (mode2 == DFmode)
27287 return mode1 == SFmode;
27289 /* If MODE2 is only appropriate for an SSE register, then tie with
27290 any other mode acceptable to SSE registers. */
27291 if (GET_MODE_SIZE (mode2) == 16
27292 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
27293 return (GET_MODE_SIZE (mode1) == 16
27294 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
27296 /* If MODE2 is appropriate for an MMX register, then tie
27297 with any other mode acceptable to MMX registers. */
27298 if (GET_MODE_SIZE (mode2) == 8
27299 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
27300 return (GET_MODE_SIZE (mode1) == 8
27301 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
27306 /* Compute a (partial) cost for rtx X. Return true if the complete
27307 cost has been computed, and false if subexpressions should be
27308 scanned. In either case, *TOTAL contains the cost result. */
27311 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
27313 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
27314 enum machine_mode mode = GET_MODE (x);
27315 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
27323 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
27325 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
27327 else if (flag_pic && SYMBOLIC_CONST (x)
27329 || (!GET_CODE (x) != LABEL_REF
27330 && (GET_CODE (x) != SYMBOL_REF
27331 || !SYMBOL_REF_LOCAL_P (x)))))
27338 if (mode == VOIDmode)
27341 switch (standard_80387_constant_p (x))
27346 default: /* Other constants */
27351 /* Start with (MEM (SYMBOL_REF)), since that's where
27352 it'll probably end up. Add a penalty for size. */
27353 *total = (COSTS_N_INSNS (1)
27354 + (flag_pic != 0 && !TARGET_64BIT)
27355 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
27361 /* The zero extensions is often completely free on x86_64, so make
27362 it as cheap as possible. */
27363 if (TARGET_64BIT && mode == DImode
27364 && GET_MODE (XEXP (x, 0)) == SImode)
27366 else if (TARGET_ZERO_EXTEND_WITH_AND)
27367 *total = cost->add;
27369 *total = cost->movzx;
27373 *total = cost->movsx;
27377 if (CONST_INT_P (XEXP (x, 1))
27378 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
27380 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
27383 *total = cost->add;
27386 if ((value == 2 || value == 3)
27387 && cost->lea <= cost->shift_const)
27389 *total = cost->lea;
27399 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
27401 if (CONST_INT_P (XEXP (x, 1)))
27403 if (INTVAL (XEXP (x, 1)) > 32)
27404 *total = cost->shift_const + COSTS_N_INSNS (2);
27406 *total = cost->shift_const * 2;
27410 if (GET_CODE (XEXP (x, 1)) == AND)
27411 *total = cost->shift_var * 2;
27413 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
27418 if (CONST_INT_P (XEXP (x, 1)))
27419 *total = cost->shift_const;
27421 *total = cost->shift_var;
27426 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27428 /* ??? SSE scalar cost should be used here. */
27429 *total = cost->fmul;
27432 else if (X87_FLOAT_MODE_P (mode))
27434 *total = cost->fmul;
27437 else if (FLOAT_MODE_P (mode))
27439 /* ??? SSE vector cost should be used here. */
27440 *total = cost->fmul;
27445 rtx op0 = XEXP (x, 0);
27446 rtx op1 = XEXP (x, 1);
27448 if (CONST_INT_P (XEXP (x, 1)))
27450 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
27451 for (nbits = 0; value != 0; value &= value - 1)
27455 /* This is arbitrary. */
27458 /* Compute costs correctly for widening multiplication. */
27459 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
27460 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
27461 == GET_MODE_SIZE (mode))
27463 int is_mulwiden = 0;
27464 enum machine_mode inner_mode = GET_MODE (op0);
27466 if (GET_CODE (op0) == GET_CODE (op1))
27467 is_mulwiden = 1, op1 = XEXP (op1, 0);
27468 else if (CONST_INT_P (op1))
27470 if (GET_CODE (op0) == SIGN_EXTEND)
27471 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
27474 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
27478 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
27481 *total = (cost->mult_init[MODE_INDEX (mode)]
27482 + nbits * cost->mult_bit
27483 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
27492 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27493 /* ??? SSE cost should be used here. */
27494 *total = cost->fdiv;
27495 else if (X87_FLOAT_MODE_P (mode))
27496 *total = cost->fdiv;
27497 else if (FLOAT_MODE_P (mode))
27498 /* ??? SSE vector cost should be used here. */
27499 *total = cost->fdiv;
27501 *total = cost->divide[MODE_INDEX (mode)];
27505 if (GET_MODE_CLASS (mode) == MODE_INT
27506 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
27508 if (GET_CODE (XEXP (x, 0)) == PLUS
27509 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
27510 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
27511 && CONSTANT_P (XEXP (x, 1)))
27513 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
27514 if (val == 2 || val == 4 || val == 8)
27516 *total = cost->lea;
27517 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
27518 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
27519 outer_code, speed);
27520 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
27524 else if (GET_CODE (XEXP (x, 0)) == MULT
27525 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
27527 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
27528 if (val == 2 || val == 4 || val == 8)
27530 *total = cost->lea;
27531 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
27532 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
27536 else if (GET_CODE (XEXP (x, 0)) == PLUS)
27538 *total = cost->lea;
27539 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
27540 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
27541 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
27548 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27550 /* ??? SSE cost should be used here. */
27551 *total = cost->fadd;
27554 else if (X87_FLOAT_MODE_P (mode))
27556 *total = cost->fadd;
27559 else if (FLOAT_MODE_P (mode))
27561 /* ??? SSE vector cost should be used here. */
27562 *total = cost->fadd;
27570 if (!TARGET_64BIT && mode == DImode)
27572 *total = (cost->add * 2
27573 + (rtx_cost (XEXP (x, 0), outer_code, speed)
27574 << (GET_MODE (XEXP (x, 0)) != DImode))
27575 + (rtx_cost (XEXP (x, 1), outer_code, speed)
27576 << (GET_MODE (XEXP (x, 1)) != DImode)));
27582 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27584 /* ??? SSE cost should be used here. */
27585 *total = cost->fchs;
27588 else if (X87_FLOAT_MODE_P (mode))
27590 *total = cost->fchs;
27593 else if (FLOAT_MODE_P (mode))
27595 /* ??? SSE vector cost should be used here. */
27596 *total = cost->fchs;
27602 if (!TARGET_64BIT && mode == DImode)
27603 *total = cost->add * 2;
27605 *total = cost->add;
27609 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
27610 && XEXP (XEXP (x, 0), 1) == const1_rtx
27611 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
27612 && XEXP (x, 1) == const0_rtx)
27614 /* This kind of construct is implemented using test[bwl].
27615 Treat it as if we had an AND. */
27616 *total = (cost->add
27617 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
27618 + rtx_cost (const1_rtx, outer_code, speed));
27624 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
27629 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27630 /* ??? SSE cost should be used here. */
27631 *total = cost->fabs;
27632 else if (X87_FLOAT_MODE_P (mode))
27633 *total = cost->fabs;
27634 else if (FLOAT_MODE_P (mode))
27635 /* ??? SSE vector cost should be used here. */
27636 *total = cost->fabs;
27640 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27641 /* ??? SSE cost should be used here. */
27642 *total = cost->fsqrt;
27643 else if (X87_FLOAT_MODE_P (mode))
27644 *total = cost->fsqrt;
27645 else if (FLOAT_MODE_P (mode))
27646 /* ??? SSE vector cost should be used here. */
27647 *total = cost->fsqrt;
27651 if (XINT (x, 1) == UNSPEC_TP)
27658 case VEC_DUPLICATE:
27659 /* ??? Assume all of these vector manipulation patterns are
27660 recognizable. In which case they all pretty much have the
27662 *total = COSTS_N_INSNS (1);
27672 static int current_machopic_label_num;
27674 /* Given a symbol name and its associated stub, write out the
27675 definition of the stub. */
27678 machopic_output_stub (FILE *file, const char *symb, const char *stub)
27680 unsigned int length;
27681 char *binder_name, *symbol_name, lazy_ptr_name[32];
27682 int label = ++current_machopic_label_num;
27684 /* For 64-bit we shouldn't get here. */
27685 gcc_assert (!TARGET_64BIT);
27687 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
27688 symb = targetm.strip_name_encoding (symb);
27690 length = strlen (stub);
27691 binder_name = XALLOCAVEC (char, length + 32);
27692 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
27694 length = strlen (symb);
27695 symbol_name = XALLOCAVEC (char, length + 32);
27696 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
27698 sprintf (lazy_ptr_name, "L%d$lz", label);
27701 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
27703 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
27705 fprintf (file, "%s:\n", stub);
27706 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27710 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
27711 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
27712 fprintf (file, "\tjmp\t*%%edx\n");
27715 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
27717 fprintf (file, "%s:\n", binder_name);
27721 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
27722 fputs ("\tpushl\t%eax\n", file);
27725 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
27727 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
27729 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
27730 fprintf (file, "%s:\n", lazy_ptr_name);
27731 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27732 fprintf (file, ASM_LONG "%s\n", binder_name);
27734 #endif /* TARGET_MACHO */
27736 /* Order the registers for register allocator. */
27739 x86_order_regs_for_local_alloc (void)
27744 /* First allocate the local general purpose registers. */
27745 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
27746 if (GENERAL_REGNO_P (i) && call_used_regs[i])
27747 reg_alloc_order [pos++] = i;
27749 /* Global general purpose registers. */
27750 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
27751 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
27752 reg_alloc_order [pos++] = i;
27754 /* x87 registers come first in case we are doing FP math
27756 if (!TARGET_SSE_MATH)
27757 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
27758 reg_alloc_order [pos++] = i;
27760 /* SSE registers. */
27761 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
27762 reg_alloc_order [pos++] = i;
27763 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
27764 reg_alloc_order [pos++] = i;
27766 /* x87 registers. */
27767 if (TARGET_SSE_MATH)
27768 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
27769 reg_alloc_order [pos++] = i;
27771 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
27772 reg_alloc_order [pos++] = i;
27774 /* Initialize the rest of array as we do not allocate some registers
27776 while (pos < FIRST_PSEUDO_REGISTER)
27777 reg_alloc_order [pos++] = 0;
27780 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
27781 struct attribute_spec.handler. */
27783 ix86_handle_abi_attribute (tree *node, tree name,
27784 tree args ATTRIBUTE_UNUSED,
27785 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27787 if (TREE_CODE (*node) != FUNCTION_TYPE
27788 && TREE_CODE (*node) != METHOD_TYPE
27789 && TREE_CODE (*node) != FIELD_DECL
27790 && TREE_CODE (*node) != TYPE_DECL)
27792 warning (OPT_Wattributes, "%qE attribute only applies to functions",
27794 *no_add_attrs = true;
27799 warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
27801 *no_add_attrs = true;
27805 /* Can combine regparm with all attributes but fastcall. */
27806 if (is_attribute_p ("ms_abi", name))
27808 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
27810 error ("ms_abi and sysv_abi attributes are not compatible");
27815 else if (is_attribute_p ("sysv_abi", name))
27817 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
27819 error ("ms_abi and sysv_abi attributes are not compatible");
27828 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
27829 struct attribute_spec.handler. */
27831 ix86_handle_struct_attribute (tree *node, tree name,
27832 tree args ATTRIBUTE_UNUSED,
27833 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27836 if (DECL_P (*node))
27838 if (TREE_CODE (*node) == TYPE_DECL)
27839 type = &TREE_TYPE (*node);
27844 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
27845 || TREE_CODE (*type) == UNION_TYPE)))
27847 warning (OPT_Wattributes, "%qE attribute ignored",
27849 *no_add_attrs = true;
27852 else if ((is_attribute_p ("ms_struct", name)
27853 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
27854 || ((is_attribute_p ("gcc_struct", name)
27855 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
27857 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
27859 *no_add_attrs = true;
27866 ix86_handle_fndecl_attribute (tree *node, tree name,
27867 tree args ATTRIBUTE_UNUSED,
27868 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27870 if (TREE_CODE (*node) != FUNCTION_DECL)
27872 warning (OPT_Wattributes, "%qE attribute only applies to functions",
27874 *no_add_attrs = true;
27880 ix86_ms_bitfield_layout_p (const_tree record_type)
27882 return ((TARGET_MS_BITFIELD_LAYOUT
27883 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
27884 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
27887 /* Returns an expression indicating where the this parameter is
27888 located on entry to the FUNCTION. */
27891 x86_this_parameter (tree function)
27893 tree type = TREE_TYPE (function);
27894 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
27899 const int *parm_regs;
27901 if (ix86_function_type_abi (type) == MS_ABI)
27902 parm_regs = x86_64_ms_abi_int_parameter_registers;
27904 parm_regs = x86_64_int_parameter_registers;
27905 return gen_rtx_REG (DImode, parm_regs[aggr]);
27908 nregs = ix86_function_regparm (type, function);
27910 if (nregs > 0 && !stdarg_p (type))
27914 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
27915 regno = aggr ? DX_REG : CX_REG;
27916 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
27920 return gen_rtx_MEM (SImode,
27921 plus_constant (stack_pointer_rtx, 4));
27930 return gen_rtx_MEM (SImode,
27931 plus_constant (stack_pointer_rtx, 4));
27934 return gen_rtx_REG (SImode, regno);
27937 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
27940 /* Determine whether x86_output_mi_thunk can succeed. */
27943 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
27944 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
27945 HOST_WIDE_INT vcall_offset, const_tree function)
27947 /* 64-bit can handle anything. */
27951 /* For 32-bit, everything's fine if we have one free register. */
27952 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
27955 /* Need a free register for vcall_offset. */
27959 /* Need a free register for GOT references. */
27960 if (flag_pic && !targetm.binds_local_p (function))
27963 /* Otherwise ok. */
27967 /* Output the assembler code for a thunk function. THUNK_DECL is the
27968 declaration for the thunk function itself, FUNCTION is the decl for
27969 the target function. DELTA is an immediate constant offset to be
27970 added to THIS. If VCALL_OFFSET is nonzero, the word at
27971 *(*this + vcall_offset) should be added to THIS. */
27974 x86_output_mi_thunk (FILE *file,
27975 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
27976 HOST_WIDE_INT vcall_offset, tree function)
27979 rtx this_param = x86_this_parameter (function);
27982 /* Make sure unwind info is emitted for the thunk if needed. */
27983 final_start_function (emit_barrier (), file, 1);
27985 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
27986 pull it in now and let DELTA benefit. */
27987 if (REG_P (this_param))
27988 this_reg = this_param;
27989 else if (vcall_offset)
27991 /* Put the this parameter into %eax. */
27992 xops[0] = this_param;
27993 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
27994 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27997 this_reg = NULL_RTX;
27999 /* Adjust the this parameter by a fixed constant. */
28002 xops[0] = GEN_INT (delta);
28003 xops[1] = this_reg ? this_reg : this_param;
28006 if (!x86_64_general_operand (xops[0], DImode))
28008 tmp = gen_rtx_REG (DImode, R10_REG);
28010 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
28012 xops[1] = this_param;
28014 if (x86_maybe_negate_const_int (&xops[0], DImode))
28015 output_asm_insn ("sub{q}\t{%0, %1|%1, %0}", xops);
28017 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
28019 else if (x86_maybe_negate_const_int (&xops[0], SImode))
28020 output_asm_insn ("sub{l}\t{%0, %1|%1, %0}", xops);
28022 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
28025 /* Adjust the this parameter by a value stored in the vtable. */
28029 tmp = gen_rtx_REG (DImode, R10_REG);
28032 int tmp_regno = CX_REG;
28033 if (lookup_attribute ("fastcall",
28034 TYPE_ATTRIBUTES (TREE_TYPE (function)))
28035 || lookup_attribute ("thiscall",
28036 TYPE_ATTRIBUTES (TREE_TYPE (function))))
28037 tmp_regno = AX_REG;
28038 tmp = gen_rtx_REG (SImode, tmp_regno);
28041 xops[0] = gen_rtx_MEM (Pmode, this_reg);
28043 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
28045 /* Adjust the this parameter. */
28046 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
28047 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
28049 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
28050 xops[0] = GEN_INT (vcall_offset);
28052 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
28053 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
28055 xops[1] = this_reg;
28056 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
28059 /* If necessary, drop THIS back to its stack slot. */
28060 if (this_reg && this_reg != this_param)
28062 xops[0] = this_reg;
28063 xops[1] = this_param;
28064 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
28067 xops[0] = XEXP (DECL_RTL (function), 0);
28070 if (!flag_pic || targetm.binds_local_p (function))
28071 output_asm_insn ("jmp\t%P0", xops);
28072 /* All thunks should be in the same object as their target,
28073 and thus binds_local_p should be true. */
28074 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
28075 gcc_unreachable ();
28078 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
28079 tmp = gen_rtx_CONST (Pmode, tmp);
28080 tmp = gen_rtx_MEM (QImode, tmp);
28082 output_asm_insn ("jmp\t%A0", xops);
28087 if (!flag_pic || targetm.binds_local_p (function))
28088 output_asm_insn ("jmp\t%P0", xops);
28093 rtx sym_ref = XEXP (DECL_RTL (function), 0);
28094 if (TARGET_MACHO_BRANCH_ISLANDS)
28095 sym_ref = (gen_rtx_SYMBOL_REF
28097 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
28098 tmp = gen_rtx_MEM (QImode, sym_ref);
28100 output_asm_insn ("jmp\t%0", xops);
28103 #endif /* TARGET_MACHO */
28105 tmp = gen_rtx_REG (SImode, CX_REG);
28106 output_set_got (tmp, NULL_RTX);
28109 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
28110 output_asm_insn ("jmp\t{*}%1", xops);
28113 final_end_function ();
28117 x86_file_start (void)
28119 default_file_start ();
28121 darwin_file_start ();
28123 if (X86_FILE_START_VERSION_DIRECTIVE)
28124 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
28125 if (X86_FILE_START_FLTUSED)
28126 fputs ("\t.global\t__fltused\n", asm_out_file);
28127 if (ix86_asm_dialect == ASM_INTEL)
28128 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
28132 x86_field_alignment (tree field, int computed)
28134 enum machine_mode mode;
28135 tree type = TREE_TYPE (field);
28137 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
28139 mode = TYPE_MODE (strip_array_types (type));
28140 if (mode == DFmode || mode == DCmode
28141 || GET_MODE_CLASS (mode) == MODE_INT
28142 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
28143 return MIN (32, computed);
28147 /* Output assembler code to FILE to increment profiler label # LABELNO
28148 for profiling a function entry. */
28150 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
28152 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
28157 #ifndef NO_PROFILE_COUNTERS
28158 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
28161 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
28162 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
28164 fprintf (file, "\tcall\t%s\n", mcount_name);
28168 #ifndef NO_PROFILE_COUNTERS
28169 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
28172 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
28176 #ifndef NO_PROFILE_COUNTERS
28177 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
28180 fprintf (file, "\tcall\t%s\n", mcount_name);
28184 /* We don't have exact information about the insn sizes, but we may assume
28185 quite safely that we are informed about all 1 byte insns and memory
28186 address sizes. This is enough to eliminate unnecessary padding in
28190 min_insn_size (rtx insn)
28194 if (!INSN_P (insn) || !active_insn_p (insn))
28197 /* Discard alignments we've emit and jump instructions. */
28198 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
28199 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
28201 if (JUMP_TABLE_DATA_P (insn))
28204 /* Important case - calls are always 5 bytes.
28205 It is common to have many calls in the row. */
28207 && symbolic_reference_mentioned_p (PATTERN (insn))
28208 && !SIBLING_CALL_P (insn))
28210 len = get_attr_length (insn);
28214 /* For normal instructions we rely on get_attr_length being exact,
28215 with a few exceptions. */
28216 if (!JUMP_P (insn))
28218 enum attr_type type = get_attr_type (insn);
28223 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
28224 || asm_noperands (PATTERN (insn)) >= 0)
28231 /* Otherwise trust get_attr_length. */
28235 l = get_attr_length_address (insn);
28236 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
28245 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
28247 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
28251 ix86_avoid_jump_mispredicts (void)
28253 rtx insn, start = get_insns ();
28254 int nbytes = 0, njumps = 0;
28257 /* Look for all minimal intervals of instructions containing 4 jumps.
28258 The intervals are bounded by START and INSN. NBYTES is the total
28259 size of instructions in the interval including INSN and not including
28260 START. When the NBYTES is smaller than 16 bytes, it is possible
28261 that the end of START and INSN ends up in the same 16byte page.
28263 The smallest offset in the page INSN can start is the case where START
28264 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
28265 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
28267 for (insn = start; insn; insn = NEXT_INSN (insn))
28271 if (LABEL_P (insn))
28273 int align = label_to_alignment (insn);
28274 int max_skip = label_to_max_skip (insn);
28278 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
28279 already in the current 16 byte page, because otherwise
28280 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
28281 bytes to reach 16 byte boundary. */
28283 || (align <= 3 && max_skip != (1 << align) - 1))
28286 fprintf (dump_file, "Label %i with max_skip %i\n",
28287 INSN_UID (insn), max_skip);
28290 while (nbytes + max_skip >= 16)
28292 start = NEXT_INSN (start);
28293 if ((JUMP_P (start)
28294 && GET_CODE (PATTERN (start)) != ADDR_VEC
28295 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
28297 njumps--, isjump = 1;
28300 nbytes -= min_insn_size (start);
28306 min_size = min_insn_size (insn);
28307 nbytes += min_size;
28309 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
28310 INSN_UID (insn), min_size);
28312 && GET_CODE (PATTERN (insn)) != ADDR_VEC
28313 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
28321 start = NEXT_INSN (start);
28322 if ((JUMP_P (start)
28323 && GET_CODE (PATTERN (start)) != ADDR_VEC
28324 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
28326 njumps--, isjump = 1;
28329 nbytes -= min_insn_size (start);
28331 gcc_assert (njumps >= 0);
28333 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
28334 INSN_UID (start), INSN_UID (insn), nbytes);
28336 if (njumps == 3 && isjump && nbytes < 16)
28338 int padsize = 15 - nbytes + min_insn_size (insn);
28341 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
28342 INSN_UID (insn), padsize);
28343 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
28349 /* AMD Athlon works faster
28350 when RET is not destination of conditional jump or directly preceded
28351 by other jump instruction. We avoid the penalty by inserting NOP just
28352 before the RET instructions in such cases. */
28354 ix86_pad_returns (void)
28359 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
28361 basic_block bb = e->src;
28362 rtx ret = BB_END (bb);
28364 bool replace = false;
28366 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
28367 || optimize_bb_for_size_p (bb))
28369 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
28370 if (active_insn_p (prev) || LABEL_P (prev))
28372 if (prev && LABEL_P (prev))
28377 FOR_EACH_EDGE (e, ei, bb->preds)
28378 if (EDGE_FREQUENCY (e) && e->src->index >= 0
28379 && !(e->flags & EDGE_FALLTHRU))
28384 prev = prev_active_insn (ret);
28386 && ((JUMP_P (prev) && any_condjump_p (prev))
28389 /* Empty functions get branch mispredict even when the jump destination
28390 is not visible to us. */
28391 if (!prev && !optimize_function_for_size_p (cfun))
28396 emit_jump_insn_before (gen_return_internal_long (), ret);
28402 /* Count the minimum number of instructions in BB. Return 4 if the
28403 number of instructions >= 4. */
28406 ix86_count_insn_bb (basic_block bb)
28409 int insn_count = 0;
28411 /* Count number of instructions in this block. Return 4 if the number
28412 of instructions >= 4. */
28413 FOR_BB_INSNS (bb, insn)
28415 /* Only happen in exit blocks. */
28417 && GET_CODE (PATTERN (insn)) == RETURN)
28420 if (NONDEBUG_INSN_P (insn)
28421 && GET_CODE (PATTERN (insn)) != USE
28422 && GET_CODE (PATTERN (insn)) != CLOBBER)
28425 if (insn_count >= 4)
28434 /* Count the minimum number of instructions in code path in BB.
28435 Return 4 if the number of instructions >= 4. */
28438 ix86_count_insn (basic_block bb)
28442 int min_prev_count;
28444 /* Only bother counting instructions along paths with no
28445 more than 2 basic blocks between entry and exit. Given
28446 that BB has an edge to exit, determine if a predecessor
28447 of BB has an edge from entry. If so, compute the number
28448 of instructions in the predecessor block. If there
28449 happen to be multiple such blocks, compute the minimum. */
28450 min_prev_count = 4;
28451 FOR_EACH_EDGE (e, ei, bb->preds)
28454 edge_iterator prev_ei;
28456 if (e->src == ENTRY_BLOCK_PTR)
28458 min_prev_count = 0;
28461 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
28463 if (prev_e->src == ENTRY_BLOCK_PTR)
28465 int count = ix86_count_insn_bb (e->src);
28466 if (count < min_prev_count)
28467 min_prev_count = count;
28473 if (min_prev_count < 4)
28474 min_prev_count += ix86_count_insn_bb (bb);
28476 return min_prev_count;
28479 /* Pad short funtion to 4 instructions. */
28482 ix86_pad_short_function (void)
28487 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
28489 rtx ret = BB_END (e->src);
28490 if (JUMP_P (ret) && GET_CODE (PATTERN (ret)) == RETURN)
28492 int insn_count = ix86_count_insn (e->src);
28494 /* Pad short function. */
28495 if (insn_count < 4)
28499 /* Find epilogue. */
28502 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
28503 insn = PREV_INSN (insn);
28508 /* Two NOPs are counted as one instruction. */
28509 insn_count = 2 * (4 - insn_count);
28510 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
28516 /* Implement machine specific optimizations. We implement padding of returns
28517 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
28521 if (optimize && optimize_function_for_speed_p (cfun))
28523 if (TARGET_PAD_SHORT_FUNCTION)
28524 ix86_pad_short_function ();
28525 else if (TARGET_PAD_RETURNS)
28526 ix86_pad_returns ();
28527 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
28528 if (TARGET_FOUR_JUMP_LIMIT)
28529 ix86_avoid_jump_mispredicts ();
28534 /* Return nonzero when QImode register that must be represented via REX prefix
28537 x86_extended_QIreg_mentioned_p (rtx insn)
28540 extract_insn_cached (insn);
28541 for (i = 0; i < recog_data.n_operands; i++)
28542 if (REG_P (recog_data.operand[i])
28543 && REGNO (recog_data.operand[i]) > BX_REG)
28548 /* Return nonzero when P points to register encoded via REX prefix.
28549 Called via for_each_rtx. */
28551 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
28553 unsigned int regno;
28556 regno = REGNO (*p);
28557 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
28560 /* Return true when INSN mentions register that must be encoded using REX
28563 x86_extended_reg_mentioned_p (rtx insn)
28565 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
28566 extended_reg_mentioned_1, NULL);
28569 /* If profitable, negate (without causing overflow) integer constant
28570 of mode MODE at location LOC. Return true in this case. */
28572 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
28576 if (!CONST_INT_P (*loc))
28582 /* DImode x86_64 constants must fit in 32 bits. */
28583 gcc_assert (x86_64_immediate_operand (*loc, mode));
28594 gcc_unreachable ();
28597 /* Avoid overflows. */
28598 if (mode_signbit_p (mode, *loc))
28601 val = INTVAL (*loc);
28603 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
28604 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
28605 if ((val < 0 && val != -128)
28608 *loc = GEN_INT (-val);
28615 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
28616 optabs would emit if we didn't have TFmode patterns. */
28619 x86_emit_floatuns (rtx operands[2])
28621 rtx neglab, donelab, i0, i1, f0, in, out;
28622 enum machine_mode mode, inmode;
28624 inmode = GET_MODE (operands[1]);
28625 gcc_assert (inmode == SImode || inmode == DImode);
28628 in = force_reg (inmode, operands[1]);
28629 mode = GET_MODE (out);
28630 neglab = gen_label_rtx ();
28631 donelab = gen_label_rtx ();
28632 f0 = gen_reg_rtx (mode);
28634 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
28636 expand_float (out, in, 0);
28638 emit_jump_insn (gen_jump (donelab));
28641 emit_label (neglab);
28643 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
28645 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
28647 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
28649 expand_float (f0, i0, 0);
28651 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
28653 emit_label (donelab);
28656 /* AVX does not support 32-byte integer vector operations,
28657 thus the longest vector we are faced with is V16QImode. */
28658 #define MAX_VECT_LEN 16
28660 struct expand_vec_perm_d
28662 rtx target, op0, op1;
28663 unsigned char perm[MAX_VECT_LEN];
28664 enum machine_mode vmode;
28665 unsigned char nelt;
28669 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
28670 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
28672 /* Get a vector mode of the same size as the original but with elements
28673 twice as wide. This is only guaranteed to apply to integral vectors. */
28675 static inline enum machine_mode
28676 get_mode_wider_vector (enum machine_mode o)
28678 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
28679 enum machine_mode n = GET_MODE_WIDER_MODE (o);
28680 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
28681 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
28685 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28686 with all elements equal to VAR. Return true if successful. */
28689 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
28690 rtx target, rtx val)
28713 /* First attempt to recognize VAL as-is. */
28714 dup = gen_rtx_VEC_DUPLICATE (mode, val);
28715 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
28716 if (recog_memoized (insn) < 0)
28719 /* If that fails, force VAL into a register. */
28722 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
28723 seq = get_insns ();
28726 emit_insn_before (seq, insn);
28728 ok = recog_memoized (insn) >= 0;
28737 if (TARGET_SSE || TARGET_3DNOW_A)
28741 val = gen_lowpart (SImode, val);
28742 x = gen_rtx_TRUNCATE (HImode, val);
28743 x = gen_rtx_VEC_DUPLICATE (mode, x);
28744 emit_insn (gen_rtx_SET (VOIDmode, target, x));
28757 struct expand_vec_perm_d dperm;
28761 memset (&dperm, 0, sizeof (dperm));
28762 dperm.target = target;
28763 dperm.vmode = mode;
28764 dperm.nelt = GET_MODE_NUNITS (mode);
28765 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
28767 /* Extend to SImode using a paradoxical SUBREG. */
28768 tmp1 = gen_reg_rtx (SImode);
28769 emit_move_insn (tmp1, gen_lowpart (SImode, val));
28771 /* Insert the SImode value as low element of a V4SImode vector. */
28772 tmp2 = gen_lowpart (V4SImode, dperm.op0);
28773 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
28775 ok = (expand_vec_perm_1 (&dperm)
28776 || expand_vec_perm_broadcast_1 (&dperm));
28788 /* Replicate the value once into the next wider mode and recurse. */
28790 enum machine_mode smode, wsmode, wvmode;
28793 smode = GET_MODE_INNER (mode);
28794 wvmode = get_mode_wider_vector (mode);
28795 wsmode = GET_MODE_INNER (wvmode);
28797 val = convert_modes (wsmode, smode, val, true);
28798 x = expand_simple_binop (wsmode, ASHIFT, val,
28799 GEN_INT (GET_MODE_BITSIZE (smode)),
28800 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28801 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
28803 x = gen_lowpart (wvmode, target);
28804 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
28812 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
28813 rtx x = gen_reg_rtx (hvmode);
28815 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
28818 x = gen_rtx_VEC_CONCAT (mode, x, x);
28819 emit_insn (gen_rtx_SET (VOIDmode, target, x));
28828 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28829 whose ONE_VAR element is VAR, and other elements are zero. Return true
28833 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
28834 rtx target, rtx var, int one_var)
28836 enum machine_mode vsimode;
28839 bool use_vector_set = false;
28844 /* For SSE4.1, we normally use vector set. But if the second
28845 element is zero and inter-unit moves are OK, we use movq
28847 use_vector_set = (TARGET_64BIT
28849 && !(TARGET_INTER_UNIT_MOVES
28855 use_vector_set = TARGET_SSE4_1;
28858 use_vector_set = TARGET_SSE2;
28861 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
28868 use_vector_set = TARGET_AVX;
28871 /* Use ix86_expand_vector_set in 64bit mode only. */
28872 use_vector_set = TARGET_AVX && TARGET_64BIT;
28878 if (use_vector_set)
28880 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
28881 var = force_reg (GET_MODE_INNER (mode), var);
28882 ix86_expand_vector_set (mmx_ok, target, var, one_var);
28898 var = force_reg (GET_MODE_INNER (mode), var);
28899 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
28900 emit_insn (gen_rtx_SET (VOIDmode, target, x));
28905 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
28906 new_target = gen_reg_rtx (mode);
28908 new_target = target;
28909 var = force_reg (GET_MODE_INNER (mode), var);
28910 x = gen_rtx_VEC_DUPLICATE (mode, var);
28911 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
28912 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
28915 /* We need to shuffle the value to the correct position, so
28916 create a new pseudo to store the intermediate result. */
28918 /* With SSE2, we can use the integer shuffle insns. */
28919 if (mode != V4SFmode && TARGET_SSE2)
28921 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
28923 GEN_INT (one_var == 1 ? 0 : 1),
28924 GEN_INT (one_var == 2 ? 0 : 1),
28925 GEN_INT (one_var == 3 ? 0 : 1)));
28926 if (target != new_target)
28927 emit_move_insn (target, new_target);
28931 /* Otherwise convert the intermediate result to V4SFmode and
28932 use the SSE1 shuffle instructions. */
28933 if (mode != V4SFmode)
28935 tmp = gen_reg_rtx (V4SFmode);
28936 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
28941 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
28943 GEN_INT (one_var == 1 ? 0 : 1),
28944 GEN_INT (one_var == 2 ? 0+4 : 1+4),
28945 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
28947 if (mode != V4SFmode)
28948 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
28949 else if (tmp != target)
28950 emit_move_insn (target, tmp);
28952 else if (target != new_target)
28953 emit_move_insn (target, new_target);
28958 vsimode = V4SImode;
28964 vsimode = V2SImode;
28970 /* Zero extend the variable element to SImode and recurse. */
28971 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
28973 x = gen_reg_rtx (vsimode);
28974 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
28976 gcc_unreachable ();
28978 emit_move_insn (target, gen_lowpart (mode, x));
28986 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28987 consisting of the values in VALS. It is known that all elements
28988 except ONE_VAR are constants. Return true if successful. */
28991 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
28992 rtx target, rtx vals, int one_var)
28994 rtx var = XVECEXP (vals, 0, one_var);
28995 enum machine_mode wmode;
28998 const_vec = copy_rtx (vals);
28999 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
29000 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
29008 /* For the two element vectors, it's just as easy to use
29009 the general case. */
29013 /* Use ix86_expand_vector_set in 64bit mode only. */
29036 /* There's no way to set one QImode entry easily. Combine
29037 the variable value with its adjacent constant value, and
29038 promote to an HImode set. */
29039 x = XVECEXP (vals, 0, one_var ^ 1);
29042 var = convert_modes (HImode, QImode, var, true);
29043 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
29044 NULL_RTX, 1, OPTAB_LIB_WIDEN);
29045 x = GEN_INT (INTVAL (x) & 0xff);
29049 var = convert_modes (HImode, QImode, var, true);
29050 x = gen_int_mode (INTVAL (x) << 8, HImode);
29052 if (x != const0_rtx)
29053 var = expand_simple_binop (HImode, IOR, var, x, var,
29054 1, OPTAB_LIB_WIDEN);
29056 x = gen_reg_rtx (wmode);
29057 emit_move_insn (x, gen_lowpart (wmode, const_vec));
29058 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
29060 emit_move_insn (target, gen_lowpart (mode, x));
29067 emit_move_insn (target, const_vec);
29068 ix86_expand_vector_set (mmx_ok, target, var, one_var);
29072 /* A subroutine of ix86_expand_vector_init_general. Use vector
29073 concatenate to handle the most general case: all values variable,
29074 and none identical. */
29077 ix86_expand_vector_init_concat (enum machine_mode mode,
29078 rtx target, rtx *ops, int n)
29080 enum machine_mode cmode, hmode = VOIDmode;
29081 rtx first[8], second[4];
29121 gcc_unreachable ();
29124 if (!register_operand (ops[1], cmode))
29125 ops[1] = force_reg (cmode, ops[1]);
29126 if (!register_operand (ops[0], cmode))
29127 ops[0] = force_reg (cmode, ops[0]);
29128 emit_insn (gen_rtx_SET (VOIDmode, target,
29129 gen_rtx_VEC_CONCAT (mode, ops[0],
29149 gcc_unreachable ();
29165 gcc_unreachable ();
29170 /* FIXME: We process inputs backward to help RA. PR 36222. */
29173 for (; i > 0; i -= 2, j--)
29175 first[j] = gen_reg_rtx (cmode);
29176 v = gen_rtvec (2, ops[i - 1], ops[i]);
29177 ix86_expand_vector_init (false, first[j],
29178 gen_rtx_PARALLEL (cmode, v));
29184 gcc_assert (hmode != VOIDmode);
29185 for (i = j = 0; i < n; i += 2, j++)
29187 second[j] = gen_reg_rtx (hmode);
29188 ix86_expand_vector_init_concat (hmode, second [j],
29192 ix86_expand_vector_init_concat (mode, target, second, n);
29195 ix86_expand_vector_init_concat (mode, target, first, n);
29199 gcc_unreachable ();
29203 /* A subroutine of ix86_expand_vector_init_general. Use vector
29204 interleave to handle the most general case: all values variable,
29205 and none identical. */
29208 ix86_expand_vector_init_interleave (enum machine_mode mode,
29209 rtx target, rtx *ops, int n)
29211 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
29214 rtx (*gen_load_even) (rtx, rtx, rtx);
29215 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
29216 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
29221 gen_load_even = gen_vec_setv8hi;
29222 gen_interleave_first_low = gen_vec_interleave_lowv4si;
29223 gen_interleave_second_low = gen_vec_interleave_lowv2di;
29224 inner_mode = HImode;
29225 first_imode = V4SImode;
29226 second_imode = V2DImode;
29227 third_imode = VOIDmode;
29230 gen_load_even = gen_vec_setv16qi;
29231 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
29232 gen_interleave_second_low = gen_vec_interleave_lowv4si;
29233 inner_mode = QImode;
29234 first_imode = V8HImode;
29235 second_imode = V4SImode;
29236 third_imode = V2DImode;
29239 gcc_unreachable ();
29242 for (i = 0; i < n; i++)
29244 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
29245 op0 = gen_reg_rtx (SImode);
29246 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
29248 /* Insert the SImode value as low element of V4SImode vector. */
29249 op1 = gen_reg_rtx (V4SImode);
29250 op0 = gen_rtx_VEC_MERGE (V4SImode,
29251 gen_rtx_VEC_DUPLICATE (V4SImode,
29253 CONST0_RTX (V4SImode),
29255 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
29257 /* Cast the V4SImode vector back to a vector in orignal mode. */
29258 op0 = gen_reg_rtx (mode);
29259 emit_move_insn (op0, gen_lowpart (mode, op1));
29261 /* Load even elements into the second positon. */
29262 emit_insn (gen_load_even (op0,
29263 force_reg (inner_mode,
29267 /* Cast vector to FIRST_IMODE vector. */
29268 ops[i] = gen_reg_rtx (first_imode);
29269 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
29272 /* Interleave low FIRST_IMODE vectors. */
29273 for (i = j = 0; i < n; i += 2, j++)
29275 op0 = gen_reg_rtx (first_imode);
29276 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
29278 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
29279 ops[j] = gen_reg_rtx (second_imode);
29280 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
29283 /* Interleave low SECOND_IMODE vectors. */
29284 switch (second_imode)
29287 for (i = j = 0; i < n / 2; i += 2, j++)
29289 op0 = gen_reg_rtx (second_imode);
29290 emit_insn (gen_interleave_second_low (op0, ops[i],
29293 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
29295 ops[j] = gen_reg_rtx (third_imode);
29296 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
29298 second_imode = V2DImode;
29299 gen_interleave_second_low = gen_vec_interleave_lowv2di;
29303 op0 = gen_reg_rtx (second_imode);
29304 emit_insn (gen_interleave_second_low (op0, ops[0],
29307 /* Cast the SECOND_IMODE vector back to a vector on original
29309 emit_insn (gen_rtx_SET (VOIDmode, target,
29310 gen_lowpart (mode, op0)));
29314 gcc_unreachable ();
29318 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
29319 all values variable, and none identical. */
29322 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
29323 rtx target, rtx vals)
29325 rtx ops[32], op0, op1;
29326 enum machine_mode half_mode = VOIDmode;
29333 if (!mmx_ok && !TARGET_SSE)
29345 n = GET_MODE_NUNITS (mode);
29346 for (i = 0; i < n; i++)
29347 ops[i] = XVECEXP (vals, 0, i);
29348 ix86_expand_vector_init_concat (mode, target, ops, n);
29352 half_mode = V16QImode;
29356 half_mode = V8HImode;
29360 n = GET_MODE_NUNITS (mode);
29361 for (i = 0; i < n; i++)
29362 ops[i] = XVECEXP (vals, 0, i);
29363 op0 = gen_reg_rtx (half_mode);
29364 op1 = gen_reg_rtx (half_mode);
29365 ix86_expand_vector_init_interleave (half_mode, op0, ops,
29367 ix86_expand_vector_init_interleave (half_mode, op1,
29368 &ops [n >> 1], n >> 2);
29369 emit_insn (gen_rtx_SET (VOIDmode, target,
29370 gen_rtx_VEC_CONCAT (mode, op0, op1)));
29374 if (!TARGET_SSE4_1)
29382 /* Don't use ix86_expand_vector_init_interleave if we can't
29383 move from GPR to SSE register directly. */
29384 if (!TARGET_INTER_UNIT_MOVES)
29387 n = GET_MODE_NUNITS (mode);
29388 for (i = 0; i < n; i++)
29389 ops[i] = XVECEXP (vals, 0, i);
29390 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
29398 gcc_unreachable ();
29402 int i, j, n_elts, n_words, n_elt_per_word;
29403 enum machine_mode inner_mode;
29404 rtx words[4], shift;
29406 inner_mode = GET_MODE_INNER (mode);
29407 n_elts = GET_MODE_NUNITS (mode);
29408 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
29409 n_elt_per_word = n_elts / n_words;
29410 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
29412 for (i = 0; i < n_words; ++i)
29414 rtx word = NULL_RTX;
29416 for (j = 0; j < n_elt_per_word; ++j)
29418 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
29419 elt = convert_modes (word_mode, inner_mode, elt, true);
29425 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
29426 word, 1, OPTAB_LIB_WIDEN);
29427 word = expand_simple_binop (word_mode, IOR, word, elt,
29428 word, 1, OPTAB_LIB_WIDEN);
29436 emit_move_insn (target, gen_lowpart (mode, words[0]));
29437 else if (n_words == 2)
29439 rtx tmp = gen_reg_rtx (mode);
29440 emit_clobber (tmp);
29441 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
29442 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
29443 emit_move_insn (target, tmp);
29445 else if (n_words == 4)
29447 rtx tmp = gen_reg_rtx (V4SImode);
29448 gcc_assert (word_mode == SImode);
29449 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
29450 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
29451 emit_move_insn (target, gen_lowpart (mode, tmp));
29454 gcc_unreachable ();
29458 /* Initialize vector TARGET via VALS. Suppress the use of MMX
29459 instructions unless MMX_OK is true. */
29462 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
29464 enum machine_mode mode = GET_MODE (target);
29465 enum machine_mode inner_mode = GET_MODE_INNER (mode);
29466 int n_elts = GET_MODE_NUNITS (mode);
29467 int n_var = 0, one_var = -1;
29468 bool all_same = true, all_const_zero = true;
29472 for (i = 0; i < n_elts; ++i)
29474 x = XVECEXP (vals, 0, i);
29475 if (!(CONST_INT_P (x)
29476 || GET_CODE (x) == CONST_DOUBLE
29477 || GET_CODE (x) == CONST_FIXED))
29478 n_var++, one_var = i;
29479 else if (x != CONST0_RTX (inner_mode))
29480 all_const_zero = false;
29481 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
29485 /* Constants are best loaded from the constant pool. */
29488 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
29492 /* If all values are identical, broadcast the value. */
29494 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
29495 XVECEXP (vals, 0, 0)))
29498 /* Values where only one field is non-constant are best loaded from
29499 the pool and overwritten via move later. */
29503 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
29504 XVECEXP (vals, 0, one_var),
29508 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
29512 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
29516 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
29518 enum machine_mode mode = GET_MODE (target);
29519 enum machine_mode inner_mode = GET_MODE_INNER (mode);
29520 enum machine_mode half_mode;
29521 bool use_vec_merge = false;
29523 static rtx (*gen_extract[6][2]) (rtx, rtx)
29525 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
29526 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
29527 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
29528 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
29529 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
29530 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
29532 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
29534 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
29535 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
29536 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
29537 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
29538 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
29539 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
29549 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
29550 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
29552 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
29554 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
29555 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
29561 use_vec_merge = TARGET_SSE4_1;
29569 /* For the two element vectors, we implement a VEC_CONCAT with
29570 the extraction of the other element. */
29572 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
29573 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
29576 op0 = val, op1 = tmp;
29578 op0 = tmp, op1 = val;
29580 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
29581 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
29586 use_vec_merge = TARGET_SSE4_1;
29593 use_vec_merge = true;
29597 /* tmp = target = A B C D */
29598 tmp = copy_to_reg (target);
29599 /* target = A A B B */
29600 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
29601 /* target = X A B B */
29602 ix86_expand_vector_set (false, target, val, 0);
29603 /* target = A X C D */
29604 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
29605 const1_rtx, const0_rtx,
29606 GEN_INT (2+4), GEN_INT (3+4)));
29610 /* tmp = target = A B C D */
29611 tmp = copy_to_reg (target);
29612 /* tmp = X B C D */
29613 ix86_expand_vector_set (false, tmp, val, 0);
29614 /* target = A B X D */
29615 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
29616 const0_rtx, const1_rtx,
29617 GEN_INT (0+4), GEN_INT (3+4)));
29621 /* tmp = target = A B C D */
29622 tmp = copy_to_reg (target);
29623 /* tmp = X B C D */
29624 ix86_expand_vector_set (false, tmp, val, 0);
29625 /* target = A B X D */
29626 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
29627 const0_rtx, const1_rtx,
29628 GEN_INT (2+4), GEN_INT (0+4)));
29632 gcc_unreachable ();
29637 use_vec_merge = TARGET_SSE4_1;
29641 /* Element 0 handled by vec_merge below. */
29644 use_vec_merge = true;
29650 /* With SSE2, use integer shuffles to swap element 0 and ELT,
29651 store into element 0, then shuffle them back. */
29655 order[0] = GEN_INT (elt);
29656 order[1] = const1_rtx;
29657 order[2] = const2_rtx;
29658 order[3] = GEN_INT (3);
29659 order[elt] = const0_rtx;
29661 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
29662 order[1], order[2], order[3]));
29664 ix86_expand_vector_set (false, target, val, 0);
29666 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
29667 order[1], order[2], order[3]));
29671 /* For SSE1, we have to reuse the V4SF code. */
29672 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
29673 gen_lowpart (SFmode, val), elt);
29678 use_vec_merge = TARGET_SSE2;
29681 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
29685 use_vec_merge = TARGET_SSE4_1;
29692 half_mode = V16QImode;
29698 half_mode = V8HImode;
29704 half_mode = V4SImode;
29710 half_mode = V2DImode;
29716 half_mode = V4SFmode;
29722 half_mode = V2DFmode;
29728 /* Compute offset. */
29732 gcc_assert (i <= 1);
29734 /* Extract the half. */
29735 tmp = gen_reg_rtx (half_mode);
29736 emit_insn (gen_extract[j][i] (tmp, target));
29738 /* Put val in tmp at elt. */
29739 ix86_expand_vector_set (false, tmp, val, elt);
29742 emit_insn (gen_insert[j][i] (target, target, tmp));
29751 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
29752 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
29753 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
29757 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
29759 emit_move_insn (mem, target);
29761 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
29762 emit_move_insn (tmp, val);
29764 emit_move_insn (target, mem);
29769 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
29771 enum machine_mode mode = GET_MODE (vec);
29772 enum machine_mode inner_mode = GET_MODE_INNER (mode);
29773 bool use_vec_extr = false;
29786 use_vec_extr = true;
29790 use_vec_extr = TARGET_SSE4_1;
29802 tmp = gen_reg_rtx (mode);
29803 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
29804 GEN_INT (elt), GEN_INT (elt),
29805 GEN_INT (elt+4), GEN_INT (elt+4)));
29809 tmp = gen_reg_rtx (mode);
29810 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
29814 gcc_unreachable ();
29817 use_vec_extr = true;
29822 use_vec_extr = TARGET_SSE4_1;
29836 tmp = gen_reg_rtx (mode);
29837 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
29838 GEN_INT (elt), GEN_INT (elt),
29839 GEN_INT (elt), GEN_INT (elt)));
29843 tmp = gen_reg_rtx (mode);
29844 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
29848 gcc_unreachable ();
29851 use_vec_extr = true;
29856 /* For SSE1, we have to reuse the V4SF code. */
29857 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
29858 gen_lowpart (V4SFmode, vec), elt);
29864 use_vec_extr = TARGET_SSE2;
29867 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
29871 use_vec_extr = TARGET_SSE4_1;
29875 /* ??? Could extract the appropriate HImode element and shift. */
29882 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
29883 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
29885 /* Let the rtl optimizers know about the zero extension performed. */
29886 if (inner_mode == QImode || inner_mode == HImode)
29888 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
29889 target = gen_lowpart (SImode, target);
29892 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
29896 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
29898 emit_move_insn (mem, vec);
29900 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
29901 emit_move_insn (target, tmp);
29905 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
29906 pattern to reduce; DEST is the destination; IN is the input vector. */
29909 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
29911 rtx tmp1, tmp2, tmp3;
29913 tmp1 = gen_reg_rtx (V4SFmode);
29914 tmp2 = gen_reg_rtx (V4SFmode);
29915 tmp3 = gen_reg_rtx (V4SFmode);
29917 emit_insn (gen_sse_movhlps (tmp1, in, in));
29918 emit_insn (fn (tmp2, tmp1, in));
29920 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
29921 const1_rtx, const1_rtx,
29922 GEN_INT (1+4), GEN_INT (1+4)));
29923 emit_insn (fn (dest, tmp2, tmp3));
29926 /* Target hook for scalar_mode_supported_p. */
29928 ix86_scalar_mode_supported_p (enum machine_mode mode)
29930 if (DECIMAL_FLOAT_MODE_P (mode))
29931 return default_decimal_float_supported_p ();
29932 else if (mode == TFmode)
29935 return default_scalar_mode_supported_p (mode);
29938 /* Implements target hook vector_mode_supported_p. */
29940 ix86_vector_mode_supported_p (enum machine_mode mode)
29942 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
29944 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
29946 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
29948 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
29950 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
29955 /* Target hook for c_mode_for_suffix. */
29956 static enum machine_mode
29957 ix86_c_mode_for_suffix (char suffix)
29967 /* Worker function for TARGET_MD_ASM_CLOBBERS.
29969 We do this in the new i386 backend to maintain source compatibility
29970 with the old cc0-based compiler. */
29973 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
29974 tree inputs ATTRIBUTE_UNUSED,
29977 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
29979 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
29984 /* Implements target vector targetm.asm.encode_section_info. This
29985 is not used by netware. */
29987 static void ATTRIBUTE_UNUSED
29988 ix86_encode_section_info (tree decl, rtx rtl, int first)
29990 default_encode_section_info (decl, rtl, first);
29992 if (TREE_CODE (decl) == VAR_DECL
29993 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
29994 && ix86_in_large_data_p (decl))
29995 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
29998 /* Worker function for REVERSE_CONDITION. */
30001 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
30003 return (mode != CCFPmode && mode != CCFPUmode
30004 ? reverse_condition (code)
30005 : reverse_condition_maybe_unordered (code));
30008 /* Output code to perform an x87 FP register move, from OPERANDS[1]
30012 output_387_reg_move (rtx insn, rtx *operands)
30014 if (REG_P (operands[0]))
30016 if (REG_P (operands[1])
30017 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
30019 if (REGNO (operands[0]) == FIRST_STACK_REG)
30020 return output_387_ffreep (operands, 0);
30021 return "fstp\t%y0";
30023 if (STACK_TOP_P (operands[0]))
30024 return "fld%Z1\t%y1";
30027 else if (MEM_P (operands[0]))
30029 gcc_assert (REG_P (operands[1]));
30030 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
30031 return "fstp%Z0\t%y0";
30034 /* There is no non-popping store to memory for XFmode.
30035 So if we need one, follow the store with a load. */
30036 if (GET_MODE (operands[0]) == XFmode)
30037 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
30039 return "fst%Z0\t%y0";
30046 /* Output code to perform a conditional jump to LABEL, if C2 flag in
30047 FP status register is set. */
30050 ix86_emit_fp_unordered_jump (rtx label)
30052 rtx reg = gen_reg_rtx (HImode);
30055 emit_insn (gen_x86_fnstsw_1 (reg));
30057 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
30059 emit_insn (gen_x86_sahf_1 (reg));
30061 temp = gen_rtx_REG (CCmode, FLAGS_REG);
30062 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
30066 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
30068 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
30069 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
30072 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
30073 gen_rtx_LABEL_REF (VOIDmode, label),
30075 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
30077 emit_jump_insn (temp);
30078 predict_jump (REG_BR_PROB_BASE * 10 / 100);
30081 /* Output code to perform a log1p XFmode calculation. */
30083 void ix86_emit_i387_log1p (rtx op0, rtx op1)
30085 rtx label1 = gen_label_rtx ();
30086 rtx label2 = gen_label_rtx ();
30088 rtx tmp = gen_reg_rtx (XFmode);
30089 rtx tmp2 = gen_reg_rtx (XFmode);
30092 emit_insn (gen_absxf2 (tmp, op1));
30093 test = gen_rtx_GE (VOIDmode, tmp,
30094 CONST_DOUBLE_FROM_REAL_VALUE (
30095 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
30097 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
30099 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
30100 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
30101 emit_jump (label2);
30103 emit_label (label1);
30104 emit_move_insn (tmp, CONST1_RTX (XFmode));
30105 emit_insn (gen_addxf3 (tmp, op1, tmp));
30106 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
30107 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
30109 emit_label (label2);
30112 /* Output code to perform a Newton-Rhapson approximation of a single precision
30113 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
30115 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
30117 rtx x0, x1, e0, e1, two;
30119 x0 = gen_reg_rtx (mode);
30120 e0 = gen_reg_rtx (mode);
30121 e1 = gen_reg_rtx (mode);
30122 x1 = gen_reg_rtx (mode);
30124 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
30126 if (VECTOR_MODE_P (mode))
30127 two = ix86_build_const_vector (mode, true, two);
30129 two = force_reg (mode, two);
30131 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
30133 /* x0 = rcp(b) estimate */
30134 emit_insn (gen_rtx_SET (VOIDmode, x0,
30135 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
30138 emit_insn (gen_rtx_SET (VOIDmode, e0,
30139 gen_rtx_MULT (mode, x0, a)));
30141 emit_insn (gen_rtx_SET (VOIDmode, e1,
30142 gen_rtx_MULT (mode, x0, b)));
30144 emit_insn (gen_rtx_SET (VOIDmode, x1,
30145 gen_rtx_MINUS (mode, two, e1)));
30146 /* res = e0 * x1 */
30147 emit_insn (gen_rtx_SET (VOIDmode, res,
30148 gen_rtx_MULT (mode, e0, x1)));
30151 /* Output code to perform a Newton-Rhapson approximation of a
30152 single precision floating point [reciprocal] square root. */
30154 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
30157 rtx x0, e0, e1, e2, e3, mthree, mhalf;
30160 x0 = gen_reg_rtx (mode);
30161 e0 = gen_reg_rtx (mode);
30162 e1 = gen_reg_rtx (mode);
30163 e2 = gen_reg_rtx (mode);
30164 e3 = gen_reg_rtx (mode);
30166 real_from_integer (&r, VOIDmode, -3, -1, 0);
30167 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
30169 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
30170 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
30172 if (VECTOR_MODE_P (mode))
30174 mthree = ix86_build_const_vector (mode, true, mthree);
30175 mhalf = ix86_build_const_vector (mode, true, mhalf);
30178 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
30179 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
30181 /* x0 = rsqrt(a) estimate */
30182 emit_insn (gen_rtx_SET (VOIDmode, x0,
30183 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
30186 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
30191 zero = gen_reg_rtx (mode);
30192 mask = gen_reg_rtx (mode);
30194 zero = force_reg (mode, CONST0_RTX(mode));
30195 emit_insn (gen_rtx_SET (VOIDmode, mask,
30196 gen_rtx_NE (mode, zero, a)));
30198 emit_insn (gen_rtx_SET (VOIDmode, x0,
30199 gen_rtx_AND (mode, x0, mask)));
30203 emit_insn (gen_rtx_SET (VOIDmode, e0,
30204 gen_rtx_MULT (mode, x0, a)));
30206 emit_insn (gen_rtx_SET (VOIDmode, e1,
30207 gen_rtx_MULT (mode, e0, x0)));
30210 mthree = force_reg (mode, mthree);
30211 emit_insn (gen_rtx_SET (VOIDmode, e2,
30212 gen_rtx_PLUS (mode, e1, mthree)));
30214 mhalf = force_reg (mode, mhalf);
30216 /* e3 = -.5 * x0 */
30217 emit_insn (gen_rtx_SET (VOIDmode, e3,
30218 gen_rtx_MULT (mode, x0, mhalf)));
30220 /* e3 = -.5 * e0 */
30221 emit_insn (gen_rtx_SET (VOIDmode, e3,
30222 gen_rtx_MULT (mode, e0, mhalf)));
30223 /* ret = e2 * e3 */
30224 emit_insn (gen_rtx_SET (VOIDmode, res,
30225 gen_rtx_MULT (mode, e2, e3)));
30228 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
30230 static void ATTRIBUTE_UNUSED
30231 i386_solaris_elf_named_section (const char *name, unsigned int flags,
30234 /* With Binutils 2.15, the "@unwind" marker must be specified on
30235 every occurrence of the ".eh_frame" section, not just the first
30238 && strcmp (name, ".eh_frame") == 0)
30240 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
30241 flags & SECTION_WRITE ? "aw" : "a");
30244 default_elf_asm_named_section (name, flags, decl);
30247 /* Return the mangling of TYPE if it is an extended fundamental type. */
30249 static const char *
30250 ix86_mangle_type (const_tree type)
30252 type = TYPE_MAIN_VARIANT (type);
30254 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
30255 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
30258 switch (TYPE_MODE (type))
30261 /* __float128 is "g". */
30264 /* "long double" or __float80 is "e". */
30271 /* For 32-bit code we can save PIC register setup by using
30272 __stack_chk_fail_local hidden function instead of calling
30273 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
30274 register, so it is better to call __stack_chk_fail directly. */
30277 ix86_stack_protect_fail (void)
30279 return TARGET_64BIT
30280 ? default_external_stack_protect_fail ()
30281 : default_hidden_stack_protect_fail ();
30284 /* Select a format to encode pointers in exception handling data. CODE
30285 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
30286 true if the symbol may be affected by dynamic relocations.
30288 ??? All x86 object file formats are capable of representing this.
30289 After all, the relocation needed is the same as for the call insn.
30290 Whether or not a particular assembler allows us to enter such, I
30291 guess we'll have to see. */
30293 asm_preferred_eh_data_format (int code, int global)
30297 int type = DW_EH_PE_sdata8;
30299 || ix86_cmodel == CM_SMALL_PIC
30300 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
30301 type = DW_EH_PE_sdata4;
30302 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
30304 if (ix86_cmodel == CM_SMALL
30305 || (ix86_cmodel == CM_MEDIUM && code))
30306 return DW_EH_PE_udata4;
30307 return DW_EH_PE_absptr;
30310 /* Expand copysign from SIGN to the positive value ABS_VALUE
30311 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
30314 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
30316 enum machine_mode mode = GET_MODE (sign);
30317 rtx sgn = gen_reg_rtx (mode);
30318 if (mask == NULL_RTX)
30320 enum machine_mode vmode;
30322 if (mode == SFmode)
30324 else if (mode == DFmode)
30329 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
30330 if (!VECTOR_MODE_P (mode))
30332 /* We need to generate a scalar mode mask in this case. */
30333 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
30334 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
30335 mask = gen_reg_rtx (mode);
30336 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
30340 mask = gen_rtx_NOT (mode, mask);
30341 emit_insn (gen_rtx_SET (VOIDmode, sgn,
30342 gen_rtx_AND (mode, mask, sign)));
30343 emit_insn (gen_rtx_SET (VOIDmode, result,
30344 gen_rtx_IOR (mode, abs_value, sgn)));
30347 /* Expand fabs (OP0) and return a new rtx that holds the result. The
30348 mask for masking out the sign-bit is stored in *SMASK, if that is
30351 ix86_expand_sse_fabs (rtx op0, rtx *smask)
30353 enum machine_mode vmode, mode = GET_MODE (op0);
30356 xa = gen_reg_rtx (mode);
30357 if (mode == SFmode)
30359 else if (mode == DFmode)
30363 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
30364 if (!VECTOR_MODE_P (mode))
30366 /* We need to generate a scalar mode mask in this case. */
30367 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
30368 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
30369 mask = gen_reg_rtx (mode);
30370 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
30372 emit_insn (gen_rtx_SET (VOIDmode, xa,
30373 gen_rtx_AND (mode, op0, mask)));
30381 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
30382 swapping the operands if SWAP_OPERANDS is true. The expanded
30383 code is a forward jump to a newly created label in case the
30384 comparison is true. The generated label rtx is returned. */
30386 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
30387 bool swap_operands)
30398 label = gen_label_rtx ();
30399 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
30400 emit_insn (gen_rtx_SET (VOIDmode, tmp,
30401 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
30402 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
30403 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
30404 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
30405 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
30406 JUMP_LABEL (tmp) = label;
30411 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
30412 using comparison code CODE. Operands are swapped for the comparison if
30413 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
30415 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
30416 bool swap_operands)
30418 enum machine_mode mode = GET_MODE (op0);
30419 rtx mask = gen_reg_rtx (mode);
30428 if (mode == DFmode)
30429 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
30430 gen_rtx_fmt_ee (code, mode, op0, op1)));
30432 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
30433 gen_rtx_fmt_ee (code, mode, op0, op1)));
30438 /* Generate and return a rtx of mode MODE for 2**n where n is the number
30439 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
30441 ix86_gen_TWO52 (enum machine_mode mode)
30443 REAL_VALUE_TYPE TWO52r;
30446 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
30447 TWO52 = const_double_from_real_value (TWO52r, mode);
30448 TWO52 = force_reg (mode, TWO52);
30453 /* Expand SSE sequence for computing lround from OP1 storing
30456 ix86_expand_lround (rtx op0, rtx op1)
30458 /* C code for the stuff we're doing below:
30459 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
30462 enum machine_mode mode = GET_MODE (op1);
30463 const struct real_format *fmt;
30464 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
30467 /* load nextafter (0.5, 0.0) */
30468 fmt = REAL_MODE_FORMAT (mode);
30469 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
30470 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
30472 /* adj = copysign (0.5, op1) */
30473 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
30474 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
30476 /* adj = op1 + adj */
30477 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
30479 /* op0 = (imode)adj */
30480 expand_fix (op0, adj, 0);
30483 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
30486 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
30488 /* C code for the stuff we're doing below (for do_floor):
30490 xi -= (double)xi > op1 ? 1 : 0;
30493 enum machine_mode fmode = GET_MODE (op1);
30494 enum machine_mode imode = GET_MODE (op0);
30495 rtx ireg, freg, label, tmp;
30497 /* reg = (long)op1 */
30498 ireg = gen_reg_rtx (imode);
30499 expand_fix (ireg, op1, 0);
30501 /* freg = (double)reg */
30502 freg = gen_reg_rtx (fmode);
30503 expand_float (freg, ireg, 0);
30505 /* ireg = (freg > op1) ? ireg - 1 : ireg */
30506 label = ix86_expand_sse_compare_and_jump (UNLE,
30507 freg, op1, !do_floor);
30508 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
30509 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
30510 emit_move_insn (ireg, tmp);
30512 emit_label (label);
30513 LABEL_NUSES (label) = 1;
30515 emit_move_insn (op0, ireg);
30518 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
30519 result in OPERAND0. */
30521 ix86_expand_rint (rtx operand0, rtx operand1)
30523 /* C code for the stuff we're doing below:
30524 xa = fabs (operand1);
30525 if (!isless (xa, 2**52))
30527 xa = xa + 2**52 - 2**52;
30528 return copysign (xa, operand1);
30530 enum machine_mode mode = GET_MODE (operand0);
30531 rtx res, xa, label, TWO52, mask;
30533 res = gen_reg_rtx (mode);
30534 emit_move_insn (res, operand1);
30536 /* xa = abs (operand1) */
30537 xa = ix86_expand_sse_fabs (res, &mask);
30539 /* if (!isless (xa, TWO52)) goto label; */
30540 TWO52 = ix86_gen_TWO52 (mode);
30541 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30543 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
30544 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
30546 ix86_sse_copysign_to_positive (res, xa, res, mask);
30548 emit_label (label);
30549 LABEL_NUSES (label) = 1;
30551 emit_move_insn (operand0, res);
30554 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
30557 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
30559 /* C code for the stuff we expand below.
30560 double xa = fabs (x), x2;
30561 if (!isless (xa, TWO52))
30563 xa = xa + TWO52 - TWO52;
30564 x2 = copysign (xa, x);
30573 enum machine_mode mode = GET_MODE (operand0);
30574 rtx xa, TWO52, tmp, label, one, res, mask;
30576 TWO52 = ix86_gen_TWO52 (mode);
30578 /* Temporary for holding the result, initialized to the input
30579 operand to ease control flow. */
30580 res = gen_reg_rtx (mode);
30581 emit_move_insn (res, operand1);
30583 /* xa = abs (operand1) */
30584 xa = ix86_expand_sse_fabs (res, &mask);
30586 /* if (!isless (xa, TWO52)) goto label; */
30587 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30589 /* xa = xa + TWO52 - TWO52; */
30590 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
30591 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
30593 /* xa = copysign (xa, operand1) */
30594 ix86_sse_copysign_to_positive (xa, xa, res, mask);
30596 /* generate 1.0 or -1.0 */
30597 one = force_reg (mode,
30598 const_double_from_real_value (do_floor
30599 ? dconst1 : dconstm1, mode));
30601 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
30602 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
30603 emit_insn (gen_rtx_SET (VOIDmode, tmp,
30604 gen_rtx_AND (mode, one, tmp)));
30605 /* We always need to subtract here to preserve signed zero. */
30606 tmp = expand_simple_binop (mode, MINUS,
30607 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
30608 emit_move_insn (res, tmp);
30610 emit_label (label);
30611 LABEL_NUSES (label) = 1;
30613 emit_move_insn (operand0, res);
30616 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
30619 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
30621 /* C code for the stuff we expand below.
30622 double xa = fabs (x), x2;
30623 if (!isless (xa, TWO52))
30625 x2 = (double)(long)x;
30632 if (HONOR_SIGNED_ZEROS (mode))
30633 return copysign (x2, x);
30636 enum machine_mode mode = GET_MODE (operand0);
30637 rtx xa, xi, TWO52, tmp, label, one, res, mask;
30639 TWO52 = ix86_gen_TWO52 (mode);
30641 /* Temporary for holding the result, initialized to the input
30642 operand to ease control flow. */
30643 res = gen_reg_rtx (mode);
30644 emit_move_insn (res, operand1);
30646 /* xa = abs (operand1) */
30647 xa = ix86_expand_sse_fabs (res, &mask);
30649 /* if (!isless (xa, TWO52)) goto label; */
30650 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30652 /* xa = (double)(long)x */
30653 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
30654 expand_fix (xi, res, 0);
30655 expand_float (xa, xi, 0);
30658 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
30660 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
30661 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
30662 emit_insn (gen_rtx_SET (VOIDmode, tmp,
30663 gen_rtx_AND (mode, one, tmp)));
30664 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
30665 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
30666 emit_move_insn (res, tmp);
30668 if (HONOR_SIGNED_ZEROS (mode))
30669 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
30671 emit_label (label);
30672 LABEL_NUSES (label) = 1;
30674 emit_move_insn (operand0, res);
30677 /* Expand SSE sequence for computing round from OPERAND1 storing
30678 into OPERAND0. Sequence that works without relying on DImode truncation
30679 via cvttsd2siq that is only available on 64bit targets. */
30681 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
30683 /* C code for the stuff we expand below.
30684 double xa = fabs (x), xa2, x2;
30685 if (!isless (xa, TWO52))
30687 Using the absolute value and copying back sign makes
30688 -0.0 -> -0.0 correct.
30689 xa2 = xa + TWO52 - TWO52;
30694 else if (dxa > 0.5)
30696 x2 = copysign (xa2, x);
30699 enum machine_mode mode = GET_MODE (operand0);
30700 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
30702 TWO52 = ix86_gen_TWO52 (mode);
30704 /* Temporary for holding the result, initialized to the input
30705 operand to ease control flow. */
30706 res = gen_reg_rtx (mode);
30707 emit_move_insn (res, operand1);
30709 /* xa = abs (operand1) */
30710 xa = ix86_expand_sse_fabs (res, &mask);
30712 /* if (!isless (xa, TWO52)) goto label; */
30713 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30715 /* xa2 = xa + TWO52 - TWO52; */
30716 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
30717 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
30719 /* dxa = xa2 - xa; */
30720 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
30722 /* generate 0.5, 1.0 and -0.5 */
30723 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
30724 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
30725 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
30729 tmp = gen_reg_rtx (mode);
30730 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
30731 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
30732 emit_insn (gen_rtx_SET (VOIDmode, tmp,
30733 gen_rtx_AND (mode, one, tmp)));
30734 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
30735 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
30736 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
30737 emit_insn (gen_rtx_SET (VOIDmode, tmp,
30738 gen_rtx_AND (mode, one, tmp)));
30739 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
30741 /* res = copysign (xa2, operand1) */
30742 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
30744 emit_label (label);
30745 LABEL_NUSES (label) = 1;
30747 emit_move_insn (operand0, res);
30750 /* Expand SSE sequence for computing trunc from OPERAND1 storing
30753 ix86_expand_trunc (rtx operand0, rtx operand1)
30755 /* C code for SSE variant we expand below.
30756 double xa = fabs (x), x2;
30757 if (!isless (xa, TWO52))
30759 x2 = (double)(long)x;
30760 if (HONOR_SIGNED_ZEROS (mode))
30761 return copysign (x2, x);
30764 enum machine_mode mode = GET_MODE (operand0);
30765 rtx xa, xi, TWO52, label, res, mask;
30767 TWO52 = ix86_gen_TWO52 (mode);
30769 /* Temporary for holding the result, initialized to the input
30770 operand to ease control flow. */
30771 res = gen_reg_rtx (mode);
30772 emit_move_insn (res, operand1);
30774 /* xa = abs (operand1) */
30775 xa = ix86_expand_sse_fabs (res, &mask);
30777 /* if (!isless (xa, TWO52)) goto label; */
30778 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30780 /* x = (double)(long)x */
30781 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
30782 expand_fix (xi, res, 0);
30783 expand_float (res, xi, 0);
30785 if (HONOR_SIGNED_ZEROS (mode))
30786 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
30788 emit_label (label);
30789 LABEL_NUSES (label) = 1;
30791 emit_move_insn (operand0, res);
30794 /* Expand SSE sequence for computing trunc from OPERAND1 storing
30797 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
30799 enum machine_mode mode = GET_MODE (operand0);
30800 rtx xa, mask, TWO52, label, one, res, smask, tmp;
30802 /* C code for SSE variant we expand below.
30803 double xa = fabs (x), x2;
30804 if (!isless (xa, TWO52))
30806 xa2 = xa + TWO52 - TWO52;
30810 x2 = copysign (xa2, x);
30814 TWO52 = ix86_gen_TWO52 (mode);
30816 /* Temporary for holding the result, initialized to the input
30817 operand to ease control flow. */
30818 res = gen_reg_rtx (mode);
30819 emit_move_insn (res, operand1);
30821 /* xa = abs (operand1) */
30822 xa = ix86_expand_sse_fabs (res, &smask);
30824 /* if (!isless (xa, TWO52)) goto label; */
30825 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30827 /* res = xa + TWO52 - TWO52; */
30828 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
30829 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
30830 emit_move_insn (res, tmp);
30833 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
30835 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
30836 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
30837 emit_insn (gen_rtx_SET (VOIDmode, mask,
30838 gen_rtx_AND (mode, mask, one)));
30839 tmp = expand_simple_binop (mode, MINUS,
30840 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
30841 emit_move_insn (res, tmp);
30843 /* res = copysign (res, operand1) */
30844 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
30846 emit_label (label);
30847 LABEL_NUSES (label) = 1;
30849 emit_move_insn (operand0, res);
30852 /* Expand SSE sequence for computing round from OPERAND1 storing
30855 ix86_expand_round (rtx operand0, rtx operand1)
30857 /* C code for the stuff we're doing below:
30858 double xa = fabs (x);
30859 if (!isless (xa, TWO52))
30861 xa = (double)(long)(xa + nextafter (0.5, 0.0));
30862 return copysign (xa, x);
30864 enum machine_mode mode = GET_MODE (operand0);
30865 rtx res, TWO52, xa, label, xi, half, mask;
30866 const struct real_format *fmt;
30867 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
30869 /* Temporary for holding the result, initialized to the input
30870 operand to ease control flow. */
30871 res = gen_reg_rtx (mode);
30872 emit_move_insn (res, operand1);
30874 TWO52 = ix86_gen_TWO52 (mode);
30875 xa = ix86_expand_sse_fabs (res, &mask);
30876 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30878 /* load nextafter (0.5, 0.0) */
30879 fmt = REAL_MODE_FORMAT (mode);
30880 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
30881 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
30883 /* xa = xa + 0.5 */
30884 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
30885 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
30887 /* xa = (double)(int64_t)xa */
30888 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
30889 expand_fix (xi, xa, 0);
30890 expand_float (xa, xi, 0);
30892 /* res = copysign (xa, operand1) */
30893 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
30895 emit_label (label);
30896 LABEL_NUSES (label) = 1;
30898 emit_move_insn (operand0, res);
30902 /* Table of valid machine attributes. */
30903 static const struct attribute_spec ix86_attribute_table[] =
30905 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
30906 /* Stdcall attribute says callee is responsible for popping arguments
30907 if they are not variable. */
30908 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30909 /* Fastcall attribute says callee is responsible for popping arguments
30910 if they are not variable. */
30911 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30912 /* Thiscall attribute says callee is responsible for popping arguments
30913 if they are not variable. */
30914 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30915 /* Cdecl attribute says the callee is a normal C declaration */
30916 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30917 /* Regparm attribute specifies how many integer arguments are to be
30918 passed in registers. */
30919 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
30920 /* Sseregparm attribute says we are using x86_64 calling conventions
30921 for FP arguments. */
30922 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30923 /* force_align_arg_pointer says this function realigns the stack at entry. */
30924 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
30925 false, true, true, ix86_handle_cconv_attribute },
30926 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30927 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
30928 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
30929 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
30931 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
30932 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
30933 #ifdef SUBTARGET_ATTRIBUTE_TABLE
30934 SUBTARGET_ATTRIBUTE_TABLE,
30936 /* ms_abi and sysv_abi calling convention function attributes. */
30937 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
30938 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
30939 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute },
30941 { NULL, 0, 0, false, false, false, NULL }
30944 /* Implement targetm.vectorize.builtin_vectorization_cost. */
30946 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
30947 tree vectype ATTRIBUTE_UNUSED,
30948 int misalign ATTRIBUTE_UNUSED)
30950 switch (type_of_cost)
30953 return ix86_cost->scalar_stmt_cost;
30956 return ix86_cost->scalar_load_cost;
30959 return ix86_cost->scalar_store_cost;
30962 return ix86_cost->vec_stmt_cost;
30965 return ix86_cost->vec_align_load_cost;
30968 return ix86_cost->vec_store_cost;
30970 case vec_to_scalar:
30971 return ix86_cost->vec_to_scalar_cost;
30973 case scalar_to_vec:
30974 return ix86_cost->scalar_to_vec_cost;
30976 case unaligned_load:
30977 case unaligned_store:
30978 return ix86_cost->vec_unalign_load_cost;
30980 case cond_branch_taken:
30981 return ix86_cost->cond_taken_branch_cost;
30983 case cond_branch_not_taken:
30984 return ix86_cost->cond_not_taken_branch_cost;
30990 gcc_unreachable ();
30995 /* Implement targetm.vectorize.builtin_vec_perm. */
30998 ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
31000 tree itype = TREE_TYPE (vec_type);
31001 bool u = TYPE_UNSIGNED (itype);
31002 enum machine_mode vmode = TYPE_MODE (vec_type);
31003 enum ix86_builtins fcode;
31004 bool ok = TARGET_SSE2;
31010 fcode = IX86_BUILTIN_VEC_PERM_V4DF;
31013 fcode = IX86_BUILTIN_VEC_PERM_V2DF;
31015 itype = ix86_get_builtin_type (IX86_BT_DI);
31020 fcode = IX86_BUILTIN_VEC_PERM_V8SF;
31024 fcode = IX86_BUILTIN_VEC_PERM_V4SF;
31026 itype = ix86_get_builtin_type (IX86_BT_SI);
31030 fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
31033 fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI;
31036 fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI;
31039 fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
31049 *mask_type = itype;
31050 return ix86_builtins[(int) fcode];
31053 /* Return a vector mode with twice as many elements as VMODE. */
31054 /* ??? Consider moving this to a table generated by genmodes.c. */
31056 static enum machine_mode
31057 doublesize_vector_mode (enum machine_mode vmode)
31061 case V2SFmode: return V4SFmode;
31062 case V1DImode: return V2DImode;
31063 case V2SImode: return V4SImode;
31064 case V4HImode: return V8HImode;
31065 case V8QImode: return V16QImode;
31067 case V2DFmode: return V4DFmode;
31068 case V4SFmode: return V8SFmode;
31069 case V2DImode: return V4DImode;
31070 case V4SImode: return V8SImode;
31071 case V8HImode: return V16HImode;
31072 case V16QImode: return V32QImode;
31074 case V4DFmode: return V8DFmode;
31075 case V8SFmode: return V16SFmode;
31076 case V4DImode: return V8DImode;
31077 case V8SImode: return V16SImode;
31078 case V16HImode: return V32HImode;
31079 case V32QImode: return V64QImode;
31082 gcc_unreachable ();
31086 /* Construct (set target (vec_select op0 (parallel perm))) and
31087 return true if that's a valid instruction in the active ISA. */
31090 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
31092 rtx rperm[MAX_VECT_LEN], x;
31095 for (i = 0; i < nelt; ++i)
31096 rperm[i] = GEN_INT (perm[i]);
31098 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
31099 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
31100 x = gen_rtx_SET (VOIDmode, target, x);
31103 if (recog_memoized (x) < 0)
31111 /* Similar, but generate a vec_concat from op0 and op1 as well. */
31114 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
31115 const unsigned char *perm, unsigned nelt)
31117 enum machine_mode v2mode;
31120 v2mode = doublesize_vector_mode (GET_MODE (op0));
31121 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
31122 return expand_vselect (target, x, perm, nelt);
31125 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
31126 in terms of blendp[sd] / pblendw / pblendvb. */
31129 expand_vec_perm_blend (struct expand_vec_perm_d *d)
31131 enum machine_mode vmode = d->vmode;
31132 unsigned i, mask, nelt = d->nelt;
31133 rtx target, op0, op1, x;
31135 if (!TARGET_SSE4_1 || d->op0 == d->op1)
31137 if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode))
31140 /* This is a blend, not a permute. Elements must stay in their
31141 respective lanes. */
31142 for (i = 0; i < nelt; ++i)
31144 unsigned e = d->perm[i];
31145 if (!(e == i || e == i + nelt))
31152 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
31153 decision should be extracted elsewhere, so that we only try that
31154 sequence once all budget==3 options have been tried. */
31156 /* For bytes, see if bytes move in pairs so we can use pblendw with
31157 an immediate argument, rather than pblendvb with a vector argument. */
31158 if (vmode == V16QImode)
31160 bool pblendw_ok = true;
31161 for (i = 0; i < 16 && pblendw_ok; i += 2)
31162 pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]);
31166 rtx rperm[16], vperm;
31168 for (i = 0; i < nelt; ++i)
31169 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
31171 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
31172 vperm = force_reg (V16QImode, vperm);
31174 emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm));
31179 target = d->target;
31191 for (i = 0; i < nelt; ++i)
31192 mask |= (d->perm[i] >= nelt) << i;
31196 for (i = 0; i < 2; ++i)
31197 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
31201 for (i = 0; i < 4; ++i)
31202 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
31206 for (i = 0; i < 8; ++i)
31207 mask |= (d->perm[i * 2] >= 16) << i;
31211 target = gen_lowpart (vmode, target);
31212 op0 = gen_lowpart (vmode, op0);
31213 op1 = gen_lowpart (vmode, op1);
31217 gcc_unreachable ();
31220 /* This matches five different patterns with the different modes. */
31221 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
31222 x = gen_rtx_SET (VOIDmode, target, x);
31228 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
31229 in terms of the variable form of vpermilps.
31231 Note that we will have already failed the immediate input vpermilps,
31232 which requires that the high and low part shuffle be identical; the
31233 variable form doesn't require that. */
31236 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
31238 rtx rperm[8], vperm;
31241 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
31244 /* We can only permute within the 128-bit lane. */
31245 for (i = 0; i < 8; ++i)
31247 unsigned e = d->perm[i];
31248 if (i < 4 ? e >= 4 : e < 4)
31255 for (i = 0; i < 8; ++i)
31257 unsigned e = d->perm[i];
31259 /* Within each 128-bit lane, the elements of op0 are numbered
31260 from 0 and the elements of op1 are numbered from 4. */
31266 rperm[i] = GEN_INT (e);
31269 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
31270 vperm = force_reg (V8SImode, vperm);
31271 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
31276 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
31277 in terms of pshufb or vpperm. */
31280 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
31282 unsigned i, nelt, eltsz;
31283 rtx rperm[16], vperm, target, op0, op1;
31285 if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP))
31287 if (GET_MODE_SIZE (d->vmode) != 16)
31294 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
31296 for (i = 0; i < nelt; ++i)
31298 unsigned j, e = d->perm[i];
31299 for (j = 0; j < eltsz; ++j)
31300 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
31303 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
31304 vperm = force_reg (V16QImode, vperm);
31306 target = gen_lowpart (V16QImode, d->target);
31307 op0 = gen_lowpart (V16QImode, d->op0);
31308 if (d->op0 == d->op1)
31309 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
31312 op1 = gen_lowpart (V16QImode, d->op1);
31313 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
31319 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
31320 in a single instruction. */
31323 expand_vec_perm_1 (struct expand_vec_perm_d *d)
31325 unsigned i, nelt = d->nelt;
31326 unsigned char perm2[MAX_VECT_LEN];
31328 /* Check plain VEC_SELECT first, because AVX has instructions that could
31329 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
31330 input where SEL+CONCAT may not. */
31331 if (d->op0 == d->op1)
31333 int mask = nelt - 1;
31335 for (i = 0; i < nelt; i++)
31336 perm2[i] = d->perm[i] & mask;
31338 if (expand_vselect (d->target, d->op0, perm2, nelt))
31341 /* There are plenty of patterns in sse.md that are written for
31342 SEL+CONCAT and are not replicated for a single op. Perhaps
31343 that should be changed, to avoid the nastiness here. */
31345 /* Recognize interleave style patterns, which means incrementing
31346 every other permutation operand. */
31347 for (i = 0; i < nelt; i += 2)
31349 perm2[i] = d->perm[i] & mask;
31350 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
31352 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
31355 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
31358 for (i = 0; i < nelt; i += 4)
31360 perm2[i + 0] = d->perm[i + 0] & mask;
31361 perm2[i + 1] = d->perm[i + 1] & mask;
31362 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
31363 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
31366 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
31371 /* Finally, try the fully general two operand permute. */
31372 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
31375 /* Recognize interleave style patterns with reversed operands. */
31376 if (d->op0 != d->op1)
31378 for (i = 0; i < nelt; ++i)
31380 unsigned e = d->perm[i];
31388 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
31392 /* Try the SSE4.1 blend variable merge instructions. */
31393 if (expand_vec_perm_blend (d))
31396 /* Try one of the AVX vpermil variable permutations. */
31397 if (expand_vec_perm_vpermil (d))
31400 /* Try the SSSE3 pshufb or XOP vpperm variable permutation. */
31401 if (expand_vec_perm_pshufb (d))
31407 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
31408 in terms of a pair of pshuflw + pshufhw instructions. */
31411 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
31413 unsigned char perm2[MAX_VECT_LEN];
31417 if (d->vmode != V8HImode || d->op0 != d->op1)
31420 /* The two permutations only operate in 64-bit lanes. */
31421 for (i = 0; i < 4; ++i)
31422 if (d->perm[i] >= 4)
31424 for (i = 4; i < 8; ++i)
31425 if (d->perm[i] < 4)
31431 /* Emit the pshuflw. */
31432 memcpy (perm2, d->perm, 4);
31433 for (i = 4; i < 8; ++i)
31435 ok = expand_vselect (d->target, d->op0, perm2, 8);
31438 /* Emit the pshufhw. */
31439 memcpy (perm2 + 4, d->perm + 4, 4);
31440 for (i = 0; i < 4; ++i)
31442 ok = expand_vselect (d->target, d->target, perm2, 8);
31448 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
31449 the permutation using the SSSE3 palignr instruction. This succeeds
31450 when all of the elements in PERM fit within one vector and we merely
31451 need to shift them down so that a single vector permutation has a
31452 chance to succeed. */
31455 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
31457 unsigned i, nelt = d->nelt;
31462 /* Even with AVX, palignr only operates on 128-bit vectors. */
31463 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
31466 min = nelt, max = 0;
31467 for (i = 0; i < nelt; ++i)
31469 unsigned e = d->perm[i];
31475 if (min == 0 || max - min >= nelt)
31478 /* Given that we have SSSE3, we know we'll be able to implement the
31479 single operand permutation after the palignr with pshufb. */
31483 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
31484 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
31485 gen_lowpart (TImode, d->op1),
31486 gen_lowpart (TImode, d->op0), shift));
31488 d->op0 = d->op1 = d->target;
31491 for (i = 0; i < nelt; ++i)
31493 unsigned e = d->perm[i] - min;
31499 /* Test for the degenerate case where the alignment by itself
31500 produces the desired permutation. */
31504 ok = expand_vec_perm_1 (d);
31510 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
31511 a two vector permutation into a single vector permutation by using
31512 an interleave operation to merge the vectors. */
31515 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
31517 struct expand_vec_perm_d dremap, dfinal;
31518 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
31519 unsigned contents, h1, h2, h3, h4;
31520 unsigned char remap[2 * MAX_VECT_LEN];
31524 if (d->op0 == d->op1)
31527 /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
31528 lanes. We can use similar techniques with the vperm2f128 instruction,
31529 but it requires slightly different logic. */
31530 if (GET_MODE_SIZE (d->vmode) != 16)
31533 /* Examine from whence the elements come. */
31535 for (i = 0; i < nelt; ++i)
31536 contents |= 1u << d->perm[i];
31538 /* Split the two input vectors into 4 halves. */
31539 h1 = (1u << nelt2) - 1;
31544 memset (remap, 0xff, sizeof (remap));
31547 /* If the elements from the low halves use interleave low, and similarly
31548 for interleave high. If the elements are from mis-matched halves, we
31549 can use shufps for V4SF/V4SI or do a DImode shuffle. */
31550 if ((contents & (h1 | h3)) == contents)
31552 for (i = 0; i < nelt2; ++i)
31555 remap[i + nelt] = i * 2 + 1;
31556 dremap.perm[i * 2] = i;
31557 dremap.perm[i * 2 + 1] = i + nelt;
31560 else if ((contents & (h2 | h4)) == contents)
31562 for (i = 0; i < nelt2; ++i)
31564 remap[i + nelt2] = i * 2;
31565 remap[i + nelt + nelt2] = i * 2 + 1;
31566 dremap.perm[i * 2] = i + nelt2;
31567 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
31570 else if ((contents & (h1 | h4)) == contents)
31572 for (i = 0; i < nelt2; ++i)
31575 remap[i + nelt + nelt2] = i + nelt2;
31576 dremap.perm[i] = i;
31577 dremap.perm[i + nelt2] = i + nelt + nelt2;
31581 dremap.vmode = V2DImode;
31583 dremap.perm[0] = 0;
31584 dremap.perm[1] = 3;
31587 else if ((contents & (h2 | h3)) == contents)
31589 for (i = 0; i < nelt2; ++i)
31591 remap[i + nelt2] = i;
31592 remap[i + nelt] = i + nelt2;
31593 dremap.perm[i] = i + nelt2;
31594 dremap.perm[i + nelt2] = i + nelt;
31598 dremap.vmode = V2DImode;
31600 dremap.perm[0] = 1;
31601 dremap.perm[1] = 2;
31607 /* Use the remapping array set up above to move the elements from their
31608 swizzled locations into their final destinations. */
31610 for (i = 0; i < nelt; ++i)
31612 unsigned e = remap[d->perm[i]];
31613 gcc_assert (e < nelt);
31614 dfinal.perm[i] = e;
31616 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
31617 dfinal.op1 = dfinal.op0;
31618 dremap.target = dfinal.op0;
31620 /* Test if the final remap can be done with a single insn. For V4SFmode or
31621 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
31623 ok = expand_vec_perm_1 (&dfinal);
31624 seq = get_insns ();
31630 if (dremap.vmode != dfinal.vmode)
31632 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
31633 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
31634 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
31637 ok = expand_vec_perm_1 (&dremap);
31644 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
31645 permutation with two pshufb insns and an ior. We should have already
31646 failed all two instruction sequences. */
31649 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
31651 rtx rperm[2][16], vperm, l, h, op, m128;
31652 unsigned int i, nelt, eltsz;
31654 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
31656 gcc_assert (d->op0 != d->op1);
31659 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
31661 /* Generate two permutation masks. If the required element is within
31662 the given vector it is shuffled into the proper lane. If the required
31663 element is in the other vector, force a zero into the lane by setting
31664 bit 7 in the permutation mask. */
31665 m128 = GEN_INT (-128);
31666 for (i = 0; i < nelt; ++i)
31668 unsigned j, e = d->perm[i];
31669 unsigned which = (e >= nelt);
31673 for (j = 0; j < eltsz; ++j)
31675 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
31676 rperm[1-which][i*eltsz + j] = m128;
31680 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
31681 vperm = force_reg (V16QImode, vperm);
31683 l = gen_reg_rtx (V16QImode);
31684 op = gen_lowpart (V16QImode, d->op0);
31685 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
31687 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
31688 vperm = force_reg (V16QImode, vperm);
31690 h = gen_reg_rtx (V16QImode);
31691 op = gen_lowpart (V16QImode, d->op1);
31692 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
31694 op = gen_lowpart (V16QImode, d->target);
31695 emit_insn (gen_iorv16qi3 (op, l, h));
31700 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
31701 and extract-odd permutations. */
31704 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
31711 t1 = gen_reg_rtx (V4DFmode);
31712 t2 = gen_reg_rtx (V4DFmode);
31714 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
31715 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
31716 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
31718 /* Now an unpck[lh]pd will produce the result required. */
31720 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
31722 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
31728 int mask = odd ? 0xdd : 0x88;
31730 t1 = gen_reg_rtx (V8SFmode);
31731 t2 = gen_reg_rtx (V8SFmode);
31732 t3 = gen_reg_rtx (V8SFmode);
31734 /* Shuffle within the 128-bit lanes to produce:
31735 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
31736 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
31739 /* Shuffle the lanes around to produce:
31740 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
31741 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
31744 /* Shuffle within the 128-bit lanes to produce:
31745 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
31746 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
31748 /* Shuffle within the 128-bit lanes to produce:
31749 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
31750 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
31752 /* Shuffle the lanes around to produce:
31753 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
31754 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
31763 /* These are always directly implementable by expand_vec_perm_1. */
31764 gcc_unreachable ();
31768 return expand_vec_perm_pshufb2 (d);
31771 /* We need 2*log2(N)-1 operations to achieve odd/even
31772 with interleave. */
31773 t1 = gen_reg_rtx (V8HImode);
31774 t2 = gen_reg_rtx (V8HImode);
31775 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
31776 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
31777 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
31778 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
31780 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
31782 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
31789 return expand_vec_perm_pshufb2 (d);
31792 t1 = gen_reg_rtx (V16QImode);
31793 t2 = gen_reg_rtx (V16QImode);
31794 t3 = gen_reg_rtx (V16QImode);
31795 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
31796 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
31797 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
31798 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
31799 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
31800 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
31802 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
31804 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
31810 gcc_unreachable ();
31816 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
31817 extract-even and extract-odd permutations. */
31820 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
31822 unsigned i, odd, nelt = d->nelt;
31825 if (odd != 0 && odd != 1)
31828 for (i = 1; i < nelt; ++i)
31829 if (d->perm[i] != 2 * i + odd)
31832 return expand_vec_perm_even_odd_1 (d, odd);
31835 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
31836 permutations. We assume that expand_vec_perm_1 has already failed. */
31839 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
31841 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
31842 enum machine_mode vmode = d->vmode;
31843 unsigned char perm2[4];
31851 /* These are special-cased in sse.md so that we can optionally
31852 use the vbroadcast instruction. They expand to two insns
31853 if the input happens to be in a register. */
31854 gcc_unreachable ();
31860 /* These are always implementable using standard shuffle patterns. */
31861 gcc_unreachable ();
31865 /* These can be implemented via interleave. We save one insn by
31866 stopping once we have promoted to V4SImode and then use pshufd. */
31869 optab otab = vec_interleave_low_optab;
31873 otab = vec_interleave_high_optab;
31878 op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
31879 vmode = get_mode_wider_vector (vmode);
31880 op0 = gen_lowpart (vmode, op0);
31882 while (vmode != V4SImode);
31884 memset (perm2, elt, 4);
31885 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
31890 gcc_unreachable ();
31894 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
31895 broadcast permutations. */
31898 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
31900 unsigned i, elt, nelt = d->nelt;
31902 if (d->op0 != d->op1)
31906 for (i = 1; i < nelt; ++i)
31907 if (d->perm[i] != elt)
31910 return expand_vec_perm_broadcast_1 (d);
31913 /* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
31914 With all of the interface bits taken care of, perform the expansion
31915 in D and return true on success. */
31918 ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
31920 /* Try a single instruction expansion. */
31921 if (expand_vec_perm_1 (d))
31924 /* Try sequences of two instructions. */
31926 if (expand_vec_perm_pshuflw_pshufhw (d))
31929 if (expand_vec_perm_palignr (d))
31932 if (expand_vec_perm_interleave2 (d))
31935 if (expand_vec_perm_broadcast (d))
31938 /* Try sequences of three instructions. */
31940 if (expand_vec_perm_pshufb2 (d))
31943 /* ??? Look for narrow permutations whose element orderings would
31944 allow the promotion to a wider mode. */
31946 /* ??? Look for sequences of interleave or a wider permute that place
31947 the data into the correct lanes for a half-vector shuffle like
31948 pshuf[lh]w or vpermilps. */
31950 /* ??? Look for sequences of interleave that produce the desired results.
31951 The combinatorics of punpck[lh] get pretty ugly... */
31953 if (expand_vec_perm_even_odd (d))
31959 /* Extract the values from the vector CST into the permutation array in D.
31960 Return 0 on error, 1 if all values from the permutation come from the
31961 first vector, 2 if all values from the second vector, and 3 otherwise. */
31964 extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst)
31966 tree list = TREE_VECTOR_CST_ELTS (cst);
31967 unsigned i, nelt = d->nelt;
31970 for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
31972 unsigned HOST_WIDE_INT e;
31974 if (!host_integerp (TREE_VALUE (list), 1))
31976 e = tree_low_cst (TREE_VALUE (list), 1);
31980 ret |= (e < nelt ? 1 : 2);
31983 gcc_assert (list == NULL);
31985 /* For all elements from second vector, fold the elements to first. */
31987 for (i = 0; i < nelt; ++i)
31988 d->perm[i] -= nelt;
31994 ix86_expand_vec_perm_builtin (tree exp)
31996 struct expand_vec_perm_d d;
31997 tree arg0, arg1, arg2;
31999 arg0 = CALL_EXPR_ARG (exp, 0);
32000 arg1 = CALL_EXPR_ARG (exp, 1);
32001 arg2 = CALL_EXPR_ARG (exp, 2);
32003 d.vmode = TYPE_MODE (TREE_TYPE (arg0));
32004 d.nelt = GET_MODE_NUNITS (d.vmode);
32005 d.testing_p = false;
32006 gcc_assert (VECTOR_MODE_P (d.vmode));
32008 if (TREE_CODE (arg2) != VECTOR_CST)
32010 error_at (EXPR_LOCATION (exp),
32011 "vector permutation requires vector constant");
32015 switch (extract_vec_perm_cst (&d, arg2))
32021 error_at (EXPR_LOCATION (exp), "invalid vector permutation constant");
32025 if (!operand_equal_p (arg0, arg1, 0))
32027 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
32028 d.op0 = force_reg (d.vmode, d.op0);
32029 d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
32030 d.op1 = force_reg (d.vmode, d.op1);
32034 /* The elements of PERM do not suggest that only the first operand
32035 is used, but both operands are identical. Allow easier matching
32036 of the permutation by folding the permutation into the single
32039 unsigned i, nelt = d.nelt;
32040 for (i = 0; i < nelt; ++i)
32041 if (d.perm[i] >= nelt)
32047 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
32048 d.op0 = force_reg (d.vmode, d.op0);
32053 d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
32054 d.op0 = force_reg (d.vmode, d.op0);
32059 d.target = gen_reg_rtx (d.vmode);
32060 if (ix86_expand_vec_perm_builtin_1 (&d))
32063 /* For compiler generated permutations, we should never got here, because
32064 the compiler should also be checking the ok hook. But since this is a
32065 builtin the user has access too, so don't abort. */
32069 sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]);
32072 sorry ("vector permutation (%d %d %d %d)",
32073 d.perm[0], d.perm[1], d.perm[2], d.perm[3]);
32076 sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
32077 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
32078 d.perm[4], d.perm[5], d.perm[6], d.perm[7]);
32081 sorry ("vector permutation "
32082 "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
32083 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
32084 d.perm[4], d.perm[5], d.perm[6], d.perm[7],
32085 d.perm[8], d.perm[9], d.perm[10], d.perm[11],
32086 d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
32089 gcc_unreachable ();
32092 return CONST0_RTX (d.vmode);
32095 /* Implement targetm.vectorize.builtin_vec_perm_ok. */
32098 ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask)
32100 struct expand_vec_perm_d d;
32104 d.vmode = TYPE_MODE (vec_type);
32105 d.nelt = GET_MODE_NUNITS (d.vmode);
32106 d.testing_p = true;
32108 /* Given sufficient ISA support we can just return true here
32109 for selected vector modes. */
32110 if (GET_MODE_SIZE (d.vmode) == 16)
32112 /* All implementable with a single vpperm insn. */
32115 /* All implementable with 2 pshufb + 1 ior. */
32118 /* All implementable with shufpd or unpck[lh]pd. */
32123 vec_mask = extract_vec_perm_cst (&d, mask);
32125 /* This hook is cannot be called in response to something that the
32126 user does (unlike the builtin expander) so we shouldn't ever see
32127 an error generated from the extract. */
32128 gcc_assert (vec_mask > 0 && vec_mask <= 3);
32129 one_vec = (vec_mask != 3);
32131 /* Implementable with shufps or pshufd. */
32132 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
32135 /* Otherwise we have to go through the motions and see if we can
32136 figure out how to generate the requested permutation. */
32137 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
32138 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
32140 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
32143 ret = ix86_expand_vec_perm_builtin_1 (&d);
32150 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
32152 struct expand_vec_perm_d d;
32158 d.vmode = GET_MODE (targ);
32159 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
32160 d.testing_p = false;
32162 for (i = 0; i < nelt; ++i)
32163 d.perm[i] = i * 2 + odd;
32165 /* We'll either be able to implement the permutation directly... */
32166 if (expand_vec_perm_1 (&d))
32169 /* ... or we use the special-case patterns. */
32170 expand_vec_perm_even_odd_1 (&d, odd);
32173 /* This function returns the calling abi specific va_list type node.
32174 It returns the FNDECL specific va_list type. */
32177 ix86_fn_abi_va_list (tree fndecl)
32180 return va_list_type_node;
32181 gcc_assert (fndecl != NULL_TREE);
32183 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
32184 return ms_va_list_type_node;
32186 return sysv_va_list_type_node;
32189 /* Returns the canonical va_list type specified by TYPE. If there
32190 is no valid TYPE provided, it return NULL_TREE. */
32193 ix86_canonical_va_list_type (tree type)
32197 /* Resolve references and pointers to va_list type. */
32198 if (TREE_CODE (type) == MEM_REF)
32199 type = TREE_TYPE (type);
32200 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
32201 type = TREE_TYPE (type);
32202 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
32203 type = TREE_TYPE (type);
32207 wtype = va_list_type_node;
32208 gcc_assert (wtype != NULL_TREE);
32210 if (TREE_CODE (wtype) == ARRAY_TYPE)
32212 /* If va_list is an array type, the argument may have decayed
32213 to a pointer type, e.g. by being passed to another function.
32214 In that case, unwrap both types so that we can compare the
32215 underlying records. */
32216 if (TREE_CODE (htype) == ARRAY_TYPE
32217 || POINTER_TYPE_P (htype))
32219 wtype = TREE_TYPE (wtype);
32220 htype = TREE_TYPE (htype);
32223 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
32224 return va_list_type_node;
32225 wtype = sysv_va_list_type_node;
32226 gcc_assert (wtype != NULL_TREE);
32228 if (TREE_CODE (wtype) == ARRAY_TYPE)
32230 /* If va_list is an array type, the argument may have decayed
32231 to a pointer type, e.g. by being passed to another function.
32232 In that case, unwrap both types so that we can compare the
32233 underlying records. */
32234 if (TREE_CODE (htype) == ARRAY_TYPE
32235 || POINTER_TYPE_P (htype))
32237 wtype = TREE_TYPE (wtype);
32238 htype = TREE_TYPE (htype);
32241 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
32242 return sysv_va_list_type_node;
32243 wtype = ms_va_list_type_node;
32244 gcc_assert (wtype != NULL_TREE);
32246 if (TREE_CODE (wtype) == ARRAY_TYPE)
32248 /* If va_list is an array type, the argument may have decayed
32249 to a pointer type, e.g. by being passed to another function.
32250 In that case, unwrap both types so that we can compare the
32251 underlying records. */
32252 if (TREE_CODE (htype) == ARRAY_TYPE
32253 || POINTER_TYPE_P (htype))
32255 wtype = TREE_TYPE (wtype);
32256 htype = TREE_TYPE (htype);
32259 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
32260 return ms_va_list_type_node;
32263 return std_canonical_va_list_type (type);
32266 /* Iterate through the target-specific builtin types for va_list.
32267 IDX denotes the iterator, *PTREE is set to the result type of
32268 the va_list builtin, and *PNAME to its internal type.
32269 Returns zero if there is no element for this index, otherwise
32270 IDX should be increased upon the next call.
32271 Note, do not iterate a base builtin's name like __builtin_va_list.
32272 Used from c_common_nodes_and_builtins. */
32275 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
32285 *ptree = ms_va_list_type_node;
32286 *pname = "__builtin_ms_va_list";
32290 *ptree = sysv_va_list_type_node;
32291 *pname = "__builtin_sysv_va_list";
32299 #undef TARGET_SCHED_DISPATCH
32300 #define TARGET_SCHED_DISPATCH has_dispatch
32301 #undef TARGET_SCHED_DISPATCH_DO
32302 #define TARGET_SCHED_DISPATCH_DO do_dispatch
32304 /* The size of the dispatch window is the total number of bytes of
32305 object code allowed in a window. */
32306 #define DISPATCH_WINDOW_SIZE 16
32308 /* Number of dispatch windows considered for scheduling. */
32309 #define MAX_DISPATCH_WINDOWS 3
32311 /* Maximum number of instructions in a window. */
32314 /* Maximum number of immediate operands in a window. */
32317 /* Maximum number of immediate bits allowed in a window. */
32318 #define MAX_IMM_SIZE 128
32320 /* Maximum number of 32 bit immediates allowed in a window. */
32321 #define MAX_IMM_32 4
32323 /* Maximum number of 64 bit immediates allowed in a window. */
32324 #define MAX_IMM_64 2
32326 /* Maximum total of loads or prefetches allowed in a window. */
32329 /* Maximum total of stores allowed in a window. */
32330 #define MAX_STORE 1
32336 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
32337 enum dispatch_group {
32352 /* Number of allowable groups in a dispatch window. It is an array
32353 indexed by dispatch_group enum. 100 is used as a big number,
32354 because the number of these kind of operations does not have any
32355 effect in dispatch window, but we need them for other reasons in
32357 static unsigned int num_allowable_groups[disp_last] = {
32358 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
32361 char group_name[disp_last + 1][16] = {
32362 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
32363 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
32364 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
32367 /* Instruction path. */
32370 path_single, /* Single micro op. */
32371 path_double, /* Double micro op. */
32372 path_multi, /* Instructions with more than 2 micro op.. */
32376 /* sched_insn_info defines a window to the instructions scheduled in
32377 the basic block. It contains a pointer to the insn_info table and
32378 the instruction scheduled.
32380 Windows are allocated for each basic block and are linked
32382 typedef struct sched_insn_info_s {
32384 enum dispatch_group group;
32385 enum insn_path path;
32390 /* Linked list of dispatch windows. This is a two way list of
32391 dispatch windows of a basic block. It contains information about
32392 the number of uops in the window and the total number of
32393 instructions and of bytes in the object code for this dispatch
32395 typedef struct dispatch_windows_s {
32396 int num_insn; /* Number of insn in the window. */
32397 int num_uops; /* Number of uops in the window. */
32398 int window_size; /* Number of bytes in the window. */
32399 int window_num; /* Window number between 0 or 1. */
32400 int num_imm; /* Number of immediates in an insn. */
32401 int num_imm_32; /* Number of 32 bit immediates in an insn. */
32402 int num_imm_64; /* Number of 64 bit immediates in an insn. */
32403 int imm_size; /* Total immediates in the window. */
32404 int num_loads; /* Total memory loads in the window. */
32405 int num_stores; /* Total memory stores in the window. */
32406 int violation; /* Violation exists in window. */
32407 sched_insn_info *window; /* Pointer to the window. */
32408 struct dispatch_windows_s *next;
32409 struct dispatch_windows_s *prev;
32410 } dispatch_windows;
32412 /* Immediate valuse used in an insn. */
32413 typedef struct imm_info_s
32420 static dispatch_windows *dispatch_window_list;
32421 static dispatch_windows *dispatch_window_list1;
32423 /* Get dispatch group of insn. */
32425 static enum dispatch_group
32426 get_mem_group (rtx insn)
32428 enum attr_memory memory;
32430 if (INSN_CODE (insn) < 0)
32431 return disp_no_group;
32432 memory = get_attr_memory (insn);
32433 if (memory == MEMORY_STORE)
32436 if (memory == MEMORY_LOAD)
32439 if (memory == MEMORY_BOTH)
32440 return disp_load_store;
32442 return disp_no_group;
32445 /* Return true if insn is a compare instruction. */
32450 enum attr_type type;
32452 type = get_attr_type (insn);
32453 return (type == TYPE_TEST
32454 || type == TYPE_ICMP
32455 || type == TYPE_FCMP
32456 || GET_CODE (PATTERN (insn)) == COMPARE);
32459 /* Return true if a dispatch violation encountered. */
32462 dispatch_violation (void)
32464 if (dispatch_window_list->next)
32465 return dispatch_window_list->next->violation;
32466 return dispatch_window_list->violation;
32469 /* Return true if insn is a branch instruction. */
32472 is_branch (rtx insn)
32474 return (CALL_P (insn) || JUMP_P (insn));
32477 /* Return true if insn is a prefetch instruction. */
32480 is_prefetch (rtx insn)
32482 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
32485 /* This function initializes a dispatch window and the list container holding a
32486 pointer to the window. */
32489 init_window (int window_num)
32492 dispatch_windows *new_list;
32494 if (window_num == 0)
32495 new_list = dispatch_window_list;
32497 new_list = dispatch_window_list1;
32499 new_list->num_insn = 0;
32500 new_list->num_uops = 0;
32501 new_list->window_size = 0;
32502 new_list->next = NULL;
32503 new_list->prev = NULL;
32504 new_list->window_num = window_num;
32505 new_list->num_imm = 0;
32506 new_list->num_imm_32 = 0;
32507 new_list->num_imm_64 = 0;
32508 new_list->imm_size = 0;
32509 new_list->num_loads = 0;
32510 new_list->num_stores = 0;
32511 new_list->violation = false;
32513 for (i = 0; i < MAX_INSN; i++)
32515 new_list->window[i].insn = NULL;
32516 new_list->window[i].group = disp_no_group;
32517 new_list->window[i].path = no_path;
32518 new_list->window[i].byte_len = 0;
32519 new_list->window[i].imm_bytes = 0;
32524 /* This function allocates and initializes a dispatch window and the
32525 list container holding a pointer to the window. */
32527 static dispatch_windows *
32528 allocate_window (void)
32530 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
32531 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
32536 /* This routine initializes the dispatch scheduling information. It
32537 initiates building dispatch scheduler tables and constructs the
32538 first dispatch window. */
32541 init_dispatch_sched (void)
32543 /* Allocate a dispatch list and a window. */
32544 dispatch_window_list = allocate_window ();
32545 dispatch_window_list1 = allocate_window ();
32550 /* This function returns true if a branch is detected. End of a basic block
32551 does not have to be a branch, but here we assume only branches end a
32555 is_end_basic_block (enum dispatch_group group)
32557 return group == disp_branch;
32560 /* This function is called when the end of a window processing is reached. */
32563 process_end_window (void)
32565 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
32566 if (dispatch_window_list->next)
32568 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
32569 gcc_assert (dispatch_window_list->window_size
32570 + dispatch_window_list1->window_size <= 48);
32576 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
32577 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
32578 for 48 bytes of instructions. Note that these windows are not dispatch
32579 windows that their sizes are DISPATCH_WINDOW_SIZE. */
32581 static dispatch_windows *
32582 allocate_next_window (int window_num)
32584 if (window_num == 0)
32586 if (dispatch_window_list->next)
32589 return dispatch_window_list;
32592 dispatch_window_list->next = dispatch_window_list1;
32593 dispatch_window_list1->prev = dispatch_window_list;
32595 return dispatch_window_list1;
32598 /* Increment the number of immediate operands of an instruction. */
32601 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
32606 switch ( GET_CODE (*in_rtx))
32611 (imm_values->imm)++;
32612 if (x86_64_immediate_operand (*in_rtx, SImode))
32613 (imm_values->imm32)++;
32615 (imm_values->imm64)++;
32619 (imm_values->imm)++;
32620 (imm_values->imm64)++;
32624 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
32626 (imm_values->imm)++;
32627 (imm_values->imm32)++;
32638 /* Compute number of immediate operands of an instruction. */
32641 find_constant (rtx in_rtx, imm_info *imm_values)
32643 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
32644 (rtx_function) find_constant_1, (void *) imm_values);
32647 /* Return total size of immediate operands of an instruction along with number
32648 of corresponding immediate-operands. It initializes its parameters to zero
32649 befor calling FIND_CONSTANT.
32650 INSN is the input instruction. IMM is the total of immediates.
32651 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
32655 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
32657 imm_info imm_values = {0, 0, 0};
32659 find_constant (insn, &imm_values);
32660 *imm = imm_values.imm;
32661 *imm32 = imm_values.imm32;
32662 *imm64 = imm_values.imm64;
32663 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
32666 /* This function indicates if an operand of an instruction is an
32670 has_immediate (rtx insn)
32672 int num_imm_operand;
32673 int num_imm32_operand;
32674 int num_imm64_operand;
32677 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
32678 &num_imm64_operand);
32682 /* Return single or double path for instructions. */
32684 static enum insn_path
32685 get_insn_path (rtx insn)
32687 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
32689 if ((int)path == 0)
32690 return path_single;
32692 if ((int)path == 1)
32693 return path_double;
32698 /* Return insn dispatch group. */
32700 static enum dispatch_group
32701 get_insn_group (rtx insn)
32703 enum dispatch_group group = get_mem_group (insn);
32707 if (is_branch (insn))
32708 return disp_branch;
32713 if (has_immediate (insn))
32716 if (is_prefetch (insn))
32717 return disp_prefetch;
32719 return disp_no_group;
32722 /* Count number of GROUP restricted instructions in a dispatch
32723 window WINDOW_LIST. */
32726 count_num_restricted (rtx insn, dispatch_windows *window_list)
32728 enum dispatch_group group = get_insn_group (insn);
32730 int num_imm_operand;
32731 int num_imm32_operand;
32732 int num_imm64_operand;
32734 if (group == disp_no_group)
32737 if (group == disp_imm)
32739 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
32740 &num_imm64_operand);
32741 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
32742 || num_imm_operand + window_list->num_imm > MAX_IMM
32743 || (num_imm32_operand > 0
32744 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
32745 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
32746 || (num_imm64_operand > 0
32747 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
32748 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
32749 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
32750 && num_imm64_operand > 0
32751 && ((window_list->num_imm_64 > 0
32752 && window_list->num_insn >= 2)
32753 || window_list->num_insn >= 3)))
32759 if ((group == disp_load_store
32760 && (window_list->num_loads >= MAX_LOAD
32761 || window_list->num_stores >= MAX_STORE))
32762 || ((group == disp_load
32763 || group == disp_prefetch)
32764 && window_list->num_loads >= MAX_LOAD)
32765 || (group == disp_store
32766 && window_list->num_stores >= MAX_STORE))
32772 /* This function returns true if insn satisfies dispatch rules on the
32773 last window scheduled. */
32776 fits_dispatch_window (rtx insn)
32778 dispatch_windows *window_list = dispatch_window_list;
32779 dispatch_windows *window_list_next = dispatch_window_list->next;
32780 unsigned int num_restrict;
32781 enum dispatch_group group = get_insn_group (insn);
32782 enum insn_path path = get_insn_path (insn);
32785 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
32786 instructions should be given the lowest priority in the
32787 scheduling process in Haifa scheduler to make sure they will be
32788 scheduled in the same dispatch window as the refrence to them. */
32789 if (group == disp_jcc || group == disp_cmp)
32792 /* Check nonrestricted. */
32793 if (group == disp_no_group || group == disp_branch)
32796 /* Get last dispatch window. */
32797 if (window_list_next)
32798 window_list = window_list_next;
32800 if (window_list->window_num == 1)
32802 sum = window_list->prev->window_size + window_list->window_size;
32805 || (min_insn_size (insn) + sum) >= 48)
32806 /* Window 1 is full. Go for next window. */
32810 num_restrict = count_num_restricted (insn, window_list);
32812 if (num_restrict > num_allowable_groups[group])
32815 /* See if it fits in the first window. */
32816 if (window_list->window_num == 0)
32818 /* The first widow should have only single and double path
32820 if (path == path_double
32821 && (window_list->num_uops + 2) > MAX_INSN)
32823 else if (path != path_single)
32829 /* Add an instruction INSN with NUM_UOPS micro-operations to the
32830 dispatch window WINDOW_LIST. */
32833 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
32835 int byte_len = min_insn_size (insn);
32836 int num_insn = window_list->num_insn;
32838 sched_insn_info *window = window_list->window;
32839 enum dispatch_group group = get_insn_group (insn);
32840 enum insn_path path = get_insn_path (insn);
32841 int num_imm_operand;
32842 int num_imm32_operand;
32843 int num_imm64_operand;
32845 if (!window_list->violation && group != disp_cmp
32846 && !fits_dispatch_window (insn))
32847 window_list->violation = true;
32849 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
32850 &num_imm64_operand);
32852 /* Initialize window with new instruction. */
32853 window[num_insn].insn = insn;
32854 window[num_insn].byte_len = byte_len;
32855 window[num_insn].group = group;
32856 window[num_insn].path = path;
32857 window[num_insn].imm_bytes = imm_size;
32859 window_list->window_size += byte_len;
32860 window_list->num_insn = num_insn + 1;
32861 window_list->num_uops = window_list->num_uops + num_uops;
32862 window_list->imm_size += imm_size;
32863 window_list->num_imm += num_imm_operand;
32864 window_list->num_imm_32 += num_imm32_operand;
32865 window_list->num_imm_64 += num_imm64_operand;
32867 if (group == disp_store)
32868 window_list->num_stores += 1;
32869 else if (group == disp_load
32870 || group == disp_prefetch)
32871 window_list->num_loads += 1;
32872 else if (group == disp_load_store)
32874 window_list->num_stores += 1;
32875 window_list->num_loads += 1;
32879 /* Adds a scheduled instruction, INSN, to the current dispatch window.
32880 If the total bytes of instructions or the number of instructions in
32881 the window exceed allowable, it allocates a new window. */
32884 add_to_dispatch_window (rtx insn)
32887 dispatch_windows *window_list;
32888 dispatch_windows *next_list;
32889 dispatch_windows *window0_list;
32890 enum insn_path path;
32891 enum dispatch_group insn_group;
32899 if (INSN_CODE (insn) < 0)
32902 byte_len = min_insn_size (insn);
32903 window_list = dispatch_window_list;
32904 next_list = window_list->next;
32905 path = get_insn_path (insn);
32906 insn_group = get_insn_group (insn);
32908 /* Get the last dispatch window. */
32910 window_list = dispatch_window_list->next;
32912 if (path == path_single)
32914 else if (path == path_double)
32917 insn_num_uops = (int) path;
32919 /* If current window is full, get a new window.
32920 Window number zero is full, if MAX_INSN uops are scheduled in it.
32921 Window number one is full, if window zero's bytes plus window
32922 one's bytes is 32, or if the bytes of the new instruction added
32923 to the total makes it greater than 48, or it has already MAX_INSN
32924 instructions in it. */
32925 num_insn = window_list->num_insn;
32926 num_uops = window_list->num_uops;
32927 window_num = window_list->window_num;
32928 insn_fits = fits_dispatch_window (insn);
32930 if (num_insn >= MAX_INSN
32931 || num_uops + insn_num_uops > MAX_INSN
32934 window_num = ~window_num & 1;
32935 window_list = allocate_next_window (window_num);
32938 if (window_num == 0)
32940 add_insn_window (insn, window_list, insn_num_uops);
32941 if (window_list->num_insn >= MAX_INSN
32942 && insn_group == disp_branch)
32944 process_end_window ();
32948 else if (window_num == 1)
32950 window0_list = window_list->prev;
32951 sum = window0_list->window_size + window_list->window_size;
32953 || (byte_len + sum) >= 48)
32955 process_end_window ();
32956 window_list = dispatch_window_list;
32959 add_insn_window (insn, window_list, insn_num_uops);
32962 gcc_unreachable ();
32964 if (is_end_basic_block (insn_group))
32966 /* End of basic block is reached do end-basic-block process. */
32967 process_end_window ();
32972 /* Print the dispatch window, WINDOW_NUM, to FILE. */
32974 DEBUG_FUNCTION static void
32975 debug_dispatch_window_file (FILE *file, int window_num)
32977 dispatch_windows *list;
32980 if (window_num == 0)
32981 list = dispatch_window_list;
32983 list = dispatch_window_list1;
32985 fprintf (file, "Window #%d:\n", list->window_num);
32986 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
32987 list->num_insn, list->num_uops, list->window_size);
32988 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
32989 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
32991 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
32993 fprintf (file, " insn info:\n");
32995 for (i = 0; i < MAX_INSN; i++)
32997 if (!list->window[i].insn)
32999 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
33000 i, group_name[list->window[i].group],
33001 i, (void *)list->window[i].insn,
33002 i, list->window[i].path,
33003 i, list->window[i].byte_len,
33004 i, list->window[i].imm_bytes);
33008 /* Print to stdout a dispatch window. */
33010 DEBUG_FUNCTION void
33011 debug_dispatch_window (int window_num)
33013 debug_dispatch_window_file (stdout, window_num);
33016 /* Print INSN dispatch information to FILE. */
33018 DEBUG_FUNCTION static void
33019 debug_insn_dispatch_info_file (FILE *file, rtx insn)
33022 enum insn_path path;
33023 enum dispatch_group group;
33025 int num_imm_operand;
33026 int num_imm32_operand;
33027 int num_imm64_operand;
33029 if (INSN_CODE (insn) < 0)
33032 byte_len = min_insn_size (insn);
33033 path = get_insn_path (insn);
33034 group = get_insn_group (insn);
33035 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
33036 &num_imm64_operand);
33038 fprintf (file, " insn info:\n");
33039 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
33040 group_name[group], path, byte_len);
33041 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
33042 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
33045 /* Print to STDERR the status of the ready list with respect to
33046 dispatch windows. */
33048 DEBUG_FUNCTION void
33049 debug_ready_dispatch (void)
33052 int no_ready = number_in_ready ();
33054 fprintf (stdout, "Number of ready: %d\n", no_ready);
33056 for (i = 0; i < no_ready; i++)
33057 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
33060 /* This routine is the driver of the dispatch scheduler. */
33063 do_dispatch (rtx insn, int mode)
33065 if (mode == DISPATCH_INIT)
33066 init_dispatch_sched ();
33067 else if (mode == ADD_TO_DISPATCH_WINDOW)
33068 add_to_dispatch_window (insn);
33071 /* Return TRUE if Dispatch Scheduling is supported. */
33074 has_dispatch (rtx insn, int action)
33076 if (ix86_tune == PROCESSOR_BDVER1 && flag_dispatch_scheduler)
33082 case IS_DISPATCH_ON:
33087 return is_cmp (insn);
33089 case DISPATCH_VIOLATION:
33090 return dispatch_violation ();
33092 case FITS_DISPATCH_WINDOW:
33093 return fits_dispatch_window (insn);
33099 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
33100 place emms and femms instructions. */
33102 static enum machine_mode
33103 ix86_preferred_simd_mode (enum machine_mode mode)
33105 /* Disable double precision vectorizer if needed. */
33106 if (mode == DFmode && !TARGET_VECTORIZE_DOUBLE)
33109 if (!TARGET_AVX && !TARGET_SSE)
33115 return TARGET_AVX ? V8SFmode : V4SFmode;
33117 return TARGET_AVX ? V4DFmode : V2DFmode;
33133 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
33136 static unsigned int
33137 ix86_autovectorize_vector_sizes (void)
33139 return TARGET_AVX ? 32 | 16 : 0;
33142 /* Initialize the GCC target structure. */
33143 #undef TARGET_RETURN_IN_MEMORY
33144 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
33146 #undef TARGET_LEGITIMIZE_ADDRESS
33147 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
33149 #undef TARGET_ATTRIBUTE_TABLE
33150 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
33151 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
33152 # undef TARGET_MERGE_DECL_ATTRIBUTES
33153 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
33156 #undef TARGET_COMP_TYPE_ATTRIBUTES
33157 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
33159 #undef TARGET_INIT_BUILTINS
33160 #define TARGET_INIT_BUILTINS ix86_init_builtins
33161 #undef TARGET_BUILTIN_DECL
33162 #define TARGET_BUILTIN_DECL ix86_builtin_decl
33163 #undef TARGET_EXPAND_BUILTIN
33164 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
33166 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
33167 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
33168 ix86_builtin_vectorized_function
33170 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
33171 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
33173 #undef TARGET_BUILTIN_RECIPROCAL
33174 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
33176 #undef TARGET_ASM_FUNCTION_EPILOGUE
33177 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
33179 #undef TARGET_ENCODE_SECTION_INFO
33180 #ifndef SUBTARGET_ENCODE_SECTION_INFO
33181 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
33183 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
33186 #undef TARGET_ASM_OPEN_PAREN
33187 #define TARGET_ASM_OPEN_PAREN ""
33188 #undef TARGET_ASM_CLOSE_PAREN
33189 #define TARGET_ASM_CLOSE_PAREN ""
33191 #undef TARGET_ASM_BYTE_OP
33192 #define TARGET_ASM_BYTE_OP ASM_BYTE
33194 #undef TARGET_ASM_ALIGNED_HI_OP
33195 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
33196 #undef TARGET_ASM_ALIGNED_SI_OP
33197 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
33199 #undef TARGET_ASM_ALIGNED_DI_OP
33200 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
33203 #undef TARGET_PROFILE_BEFORE_PROLOGUE
33204 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
33206 #undef TARGET_ASM_UNALIGNED_HI_OP
33207 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
33208 #undef TARGET_ASM_UNALIGNED_SI_OP
33209 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
33210 #undef TARGET_ASM_UNALIGNED_DI_OP
33211 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
33213 #undef TARGET_PRINT_OPERAND
33214 #define TARGET_PRINT_OPERAND ix86_print_operand
33215 #undef TARGET_PRINT_OPERAND_ADDRESS
33216 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
33217 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
33218 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
33219 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
33220 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
33222 #undef TARGET_SCHED_ADJUST_COST
33223 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
33224 #undef TARGET_SCHED_ISSUE_RATE
33225 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
33226 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
33227 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
33228 ia32_multipass_dfa_lookahead
33230 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
33231 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
33234 #undef TARGET_HAVE_TLS
33235 #define TARGET_HAVE_TLS true
33237 #undef TARGET_CANNOT_FORCE_CONST_MEM
33238 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
33239 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
33240 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
33242 #undef TARGET_DELEGITIMIZE_ADDRESS
33243 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
33245 #undef TARGET_MS_BITFIELD_LAYOUT_P
33246 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
33249 #undef TARGET_BINDS_LOCAL_P
33250 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
33252 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
33253 #undef TARGET_BINDS_LOCAL_P
33254 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
33257 #undef TARGET_ASM_OUTPUT_MI_THUNK
33258 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
33259 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
33260 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
33262 #undef TARGET_ASM_FILE_START
33263 #define TARGET_ASM_FILE_START x86_file_start
33265 #undef TARGET_DEFAULT_TARGET_FLAGS
33266 #define TARGET_DEFAULT_TARGET_FLAGS \
33268 | TARGET_SUBTARGET_DEFAULT \
33269 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT \
33272 #undef TARGET_HANDLE_OPTION
33273 #define TARGET_HANDLE_OPTION ix86_handle_option
33275 #undef TARGET_OPTION_OVERRIDE
33276 #define TARGET_OPTION_OVERRIDE ix86_option_override
33277 #undef TARGET_OPTION_OPTIMIZATION
33278 #define TARGET_OPTION_OPTIMIZATION ix86_option_optimization
33279 #undef TARGET_OPTION_INIT_STRUCT
33280 #define TARGET_OPTION_INIT_STRUCT ix86_option_init_struct
33282 #undef TARGET_REGISTER_MOVE_COST
33283 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
33284 #undef TARGET_MEMORY_MOVE_COST
33285 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
33286 #undef TARGET_RTX_COSTS
33287 #define TARGET_RTX_COSTS ix86_rtx_costs
33288 #undef TARGET_ADDRESS_COST
33289 #define TARGET_ADDRESS_COST ix86_address_cost
33291 #undef TARGET_FIXED_CONDITION_CODE_REGS
33292 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
33293 #undef TARGET_CC_MODES_COMPATIBLE
33294 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
33296 #undef TARGET_MACHINE_DEPENDENT_REORG
33297 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
33299 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
33300 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
33302 #undef TARGET_BUILD_BUILTIN_VA_LIST
33303 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
33305 #undef TARGET_ENUM_VA_LIST_P
33306 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
33308 #undef TARGET_FN_ABI_VA_LIST
33309 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
33311 #undef TARGET_CANONICAL_VA_LIST_TYPE
33312 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
33314 #undef TARGET_EXPAND_BUILTIN_VA_START
33315 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
33317 #undef TARGET_MD_ASM_CLOBBERS
33318 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
33320 #undef TARGET_PROMOTE_PROTOTYPES
33321 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
33322 #undef TARGET_STRUCT_VALUE_RTX
33323 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
33324 #undef TARGET_SETUP_INCOMING_VARARGS
33325 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
33326 #undef TARGET_MUST_PASS_IN_STACK
33327 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
33328 #undef TARGET_FUNCTION_ARG_ADVANCE
33329 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
33330 #undef TARGET_FUNCTION_ARG
33331 #define TARGET_FUNCTION_ARG ix86_function_arg
33332 #undef TARGET_PASS_BY_REFERENCE
33333 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
33334 #undef TARGET_INTERNAL_ARG_POINTER
33335 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
33336 #undef TARGET_UPDATE_STACK_BOUNDARY
33337 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
33338 #undef TARGET_GET_DRAP_RTX
33339 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
33340 #undef TARGET_STRICT_ARGUMENT_NAMING
33341 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
33342 #undef TARGET_STATIC_CHAIN
33343 #define TARGET_STATIC_CHAIN ix86_static_chain
33344 #undef TARGET_TRAMPOLINE_INIT
33345 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
33346 #undef TARGET_RETURN_POPS_ARGS
33347 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
33349 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
33350 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
33352 #undef TARGET_SCALAR_MODE_SUPPORTED_P
33353 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
33355 #undef TARGET_VECTOR_MODE_SUPPORTED_P
33356 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
33358 #undef TARGET_C_MODE_FOR_SUFFIX
33359 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
33362 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
33363 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
33366 #ifdef SUBTARGET_INSERT_ATTRIBUTES
33367 #undef TARGET_INSERT_ATTRIBUTES
33368 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
33371 #undef TARGET_MANGLE_TYPE
33372 #define TARGET_MANGLE_TYPE ix86_mangle_type
33374 #undef TARGET_STACK_PROTECT_FAIL
33375 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
33377 #undef TARGET_SUPPORTS_SPLIT_STACK
33378 #define TARGET_SUPPORTS_SPLIT_STACK ix86_supports_split_stack
33380 #undef TARGET_FUNCTION_VALUE
33381 #define TARGET_FUNCTION_VALUE ix86_function_value
33383 #undef TARGET_FUNCTION_VALUE_REGNO_P
33384 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
33386 #undef TARGET_SECONDARY_RELOAD
33387 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
33389 #undef TARGET_PREFERRED_RELOAD_CLASS
33390 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
33391 #undef TARGET_CLASS_LIKELY_SPILLED_P
33392 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
33394 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
33395 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
33396 ix86_builtin_vectorization_cost
33397 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
33398 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
33399 ix86_vectorize_builtin_vec_perm
33400 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
33401 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
33402 ix86_vectorize_builtin_vec_perm_ok
33403 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
33404 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
33405 ix86_preferred_simd_mode
33406 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
33407 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
33408 ix86_autovectorize_vector_sizes
33410 #undef TARGET_SET_CURRENT_FUNCTION
33411 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
33413 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
33414 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
33416 #undef TARGET_OPTION_SAVE
33417 #define TARGET_OPTION_SAVE ix86_function_specific_save
33419 #undef TARGET_OPTION_RESTORE
33420 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
33422 #undef TARGET_OPTION_PRINT
33423 #define TARGET_OPTION_PRINT ix86_function_specific_print
33425 #undef TARGET_CAN_INLINE_P
33426 #define TARGET_CAN_INLINE_P ix86_can_inline_p
33428 #undef TARGET_EXPAND_TO_RTL_HOOK
33429 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
33431 #undef TARGET_LEGITIMATE_ADDRESS_P
33432 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
33434 #undef TARGET_IRA_COVER_CLASSES
33435 #define TARGET_IRA_COVER_CLASSES i386_ira_cover_classes
33437 #undef TARGET_FRAME_POINTER_REQUIRED
33438 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
33440 #undef TARGET_CAN_ELIMINATE
33441 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
33443 #undef TARGET_EXTRA_LIVE_ON_ENTRY
33444 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
33446 #undef TARGET_ASM_CODE_END
33447 #define TARGET_ASM_CODE_END ix86_code_end
33449 struct gcc_target targetm = TARGET_INITIALIZER;
33451 #include "gt-i386.h"