1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
50 #include "tree-gimple.h"
53 #include "tm-constrs.h"
56 static int x86_builtin_vectorization_cost (bool);
57 static rtx legitimize_dllimport_symbol (rtx, bool);
59 #ifndef CHECK_STACK_LIMIT
60 #define CHECK_STACK_LIMIT (-1)
63 /* Return index of given mode in mult and division cost tables. */
64 #define MODE_INDEX(mode) \
65 ((mode) == QImode ? 0 \
66 : (mode) == HImode ? 1 \
67 : (mode) == SImode ? 2 \
68 : (mode) == DImode ? 3 \
71 /* Processor costs (relative to an add) */
72 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
73 #define COSTS_N_BYTES(N) ((N) * 2)
75 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
78 struct processor_costs size_cost = { /* costs for tuning for size */
79 COSTS_N_BYTES (2), /* cost of an add instruction */
80 COSTS_N_BYTES (3), /* cost of a lea instruction */
81 COSTS_N_BYTES (2), /* variable shift costs */
82 COSTS_N_BYTES (3), /* constant shift costs */
83 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
84 COSTS_N_BYTES (3), /* HI */
85 COSTS_N_BYTES (3), /* SI */
86 COSTS_N_BYTES (3), /* DI */
87 COSTS_N_BYTES (5)}, /* other */
88 0, /* cost of multiply per each bit set */
89 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
90 COSTS_N_BYTES (3), /* HI */
91 COSTS_N_BYTES (3), /* SI */
92 COSTS_N_BYTES (3), /* DI */
93 COSTS_N_BYTES (5)}, /* other */
94 COSTS_N_BYTES (3), /* cost of movsx */
95 COSTS_N_BYTES (3), /* cost of movzx */
98 2, /* cost for loading QImode using movzbl */
99 {2, 2, 2}, /* cost of loading integer registers
100 in QImode, HImode and SImode.
101 Relative to reg-reg move (2). */
102 {2, 2, 2}, /* cost of storing integer registers */
103 2, /* cost of reg,reg fld/fst */
104 {2, 2, 2}, /* cost of loading fp registers
105 in SFmode, DFmode and XFmode */
106 {2, 2, 2}, /* cost of storing fp registers
107 in SFmode, DFmode and XFmode */
108 3, /* cost of moving MMX register */
109 {3, 3}, /* cost of loading MMX registers
110 in SImode and DImode */
111 {3, 3}, /* cost of storing MMX registers
112 in SImode and DImode */
113 3, /* cost of moving SSE register */
114 {3, 3, 3}, /* cost of loading SSE registers
115 in SImode, DImode and TImode */
116 {3, 3, 3}, /* cost of storing SSE registers
117 in SImode, DImode and TImode */
118 3, /* MMX or SSE register to integer */
119 0, /* size of l1 cache */
120 0, /* size of l2 cache */
121 0, /* size of prefetch block */
122 0, /* number of parallel prefetches */
124 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
125 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
126 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
127 COSTS_N_BYTES (2), /* cost of FABS instruction. */
128 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
129 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
130 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
131 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
132 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
133 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
134 1, /* scalar_stmt_cost. */
135 1, /* scalar load_cost. */
136 1, /* scalar_store_cost. */
137 1, /* vec_stmt_cost. */
138 1, /* vec_to_scalar_cost. */
139 1, /* scalar_to_vec_cost. */
140 1, /* vec_align_load_cost. */
141 1, /* vec_unalign_load_cost. */
142 1, /* vec_store_cost. */
143 1, /* cond_taken_branch_cost. */
144 1, /* cond_not_taken_branch_cost. */
147 /* Processor costs (relative to an add) */
149 struct processor_costs i386_cost = { /* 386 specific costs */
150 COSTS_N_INSNS (1), /* cost of an add instruction */
151 COSTS_N_INSNS (1), /* cost of a lea instruction */
152 COSTS_N_INSNS (3), /* variable shift costs */
153 COSTS_N_INSNS (2), /* constant shift costs */
154 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
155 COSTS_N_INSNS (6), /* HI */
156 COSTS_N_INSNS (6), /* SI */
157 COSTS_N_INSNS (6), /* DI */
158 COSTS_N_INSNS (6)}, /* other */
159 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
160 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
161 COSTS_N_INSNS (23), /* HI */
162 COSTS_N_INSNS (23), /* SI */
163 COSTS_N_INSNS (23), /* DI */
164 COSTS_N_INSNS (23)}, /* other */
165 COSTS_N_INSNS (3), /* cost of movsx */
166 COSTS_N_INSNS (2), /* cost of movzx */
167 15, /* "large" insn */
169 4, /* cost for loading QImode using movzbl */
170 {2, 4, 2}, /* cost of loading integer registers
171 in QImode, HImode and SImode.
172 Relative to reg-reg move (2). */
173 {2, 4, 2}, /* cost of storing integer registers */
174 2, /* cost of reg,reg fld/fst */
175 {8, 8, 8}, /* cost of loading fp registers
176 in SFmode, DFmode and XFmode */
177 {8, 8, 8}, /* cost of storing fp registers
178 in SFmode, DFmode and XFmode */
179 2, /* cost of moving MMX register */
180 {4, 8}, /* cost of loading MMX registers
181 in SImode and DImode */
182 {4, 8}, /* cost of storing MMX registers
183 in SImode and DImode */
184 2, /* cost of moving SSE register */
185 {4, 8, 16}, /* cost of loading SSE registers
186 in SImode, DImode and TImode */
187 {4, 8, 16}, /* cost of storing SSE registers
188 in SImode, DImode and TImode */
189 3, /* MMX or SSE register to integer */
190 0, /* size of l1 cache */
191 0, /* size of l2 cache */
192 0, /* size of prefetch block */
193 0, /* number of parallel prefetches */
195 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
196 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
197 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
198 COSTS_N_INSNS (22), /* cost of FABS instruction. */
199 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
200 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
201 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
202 DUMMY_STRINGOP_ALGS},
203 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
204 DUMMY_STRINGOP_ALGS},
205 1, /* scalar_stmt_cost. */
206 1, /* scalar load_cost. */
207 1, /* scalar_store_cost. */
208 1, /* vec_stmt_cost. */
209 1, /* vec_to_scalar_cost. */
210 1, /* scalar_to_vec_cost. */
211 1, /* vec_align_load_cost. */
212 2, /* vec_unalign_load_cost. */
213 1, /* vec_store_cost. */
214 3, /* cond_taken_branch_cost. */
215 1, /* cond_not_taken_branch_cost. */
219 struct processor_costs i486_cost = { /* 486 specific costs */
220 COSTS_N_INSNS (1), /* cost of an add instruction */
221 COSTS_N_INSNS (1), /* cost of a lea instruction */
222 COSTS_N_INSNS (3), /* variable shift costs */
223 COSTS_N_INSNS (2), /* constant shift costs */
224 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
225 COSTS_N_INSNS (12), /* HI */
226 COSTS_N_INSNS (12), /* SI */
227 COSTS_N_INSNS (12), /* DI */
228 COSTS_N_INSNS (12)}, /* other */
229 1, /* cost of multiply per each bit set */
230 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
231 COSTS_N_INSNS (40), /* HI */
232 COSTS_N_INSNS (40), /* SI */
233 COSTS_N_INSNS (40), /* DI */
234 COSTS_N_INSNS (40)}, /* other */
235 COSTS_N_INSNS (3), /* cost of movsx */
236 COSTS_N_INSNS (2), /* cost of movzx */
237 15, /* "large" insn */
239 4, /* cost for loading QImode using movzbl */
240 {2, 4, 2}, /* cost of loading integer registers
241 in QImode, HImode and SImode.
242 Relative to reg-reg move (2). */
243 {2, 4, 2}, /* cost of storing integer registers */
244 2, /* cost of reg,reg fld/fst */
245 {8, 8, 8}, /* cost of loading fp registers
246 in SFmode, DFmode and XFmode */
247 {8, 8, 8}, /* cost of storing fp registers
248 in SFmode, DFmode and XFmode */
249 2, /* cost of moving MMX register */
250 {4, 8}, /* cost of loading MMX registers
251 in SImode and DImode */
252 {4, 8}, /* cost of storing MMX registers
253 in SImode and DImode */
254 2, /* cost of moving SSE register */
255 {4, 8, 16}, /* cost of loading SSE registers
256 in SImode, DImode and TImode */
257 {4, 8, 16}, /* cost of storing SSE registers
258 in SImode, DImode and TImode */
259 3, /* MMX or SSE register to integer */
260 4, /* size of l1 cache. 486 has 8kB cache
261 shared for code and data, so 4kB is
262 not really precise. */
263 4, /* size of l2 cache */
264 0, /* size of prefetch block */
265 0, /* number of parallel prefetches */
267 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
268 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
269 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
270 COSTS_N_INSNS (3), /* cost of FABS instruction. */
271 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
272 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
273 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
274 DUMMY_STRINGOP_ALGS},
275 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
276 DUMMY_STRINGOP_ALGS},
277 1, /* scalar_stmt_cost. */
278 1, /* scalar load_cost. */
279 1, /* scalar_store_cost. */
280 1, /* vec_stmt_cost. */
281 1, /* vec_to_scalar_cost. */
282 1, /* scalar_to_vec_cost. */
283 1, /* vec_align_load_cost. */
284 2, /* vec_unalign_load_cost. */
285 1, /* vec_store_cost. */
286 3, /* cond_taken_branch_cost. */
287 1, /* cond_not_taken_branch_cost. */
291 struct processor_costs pentium_cost = {
292 COSTS_N_INSNS (1), /* cost of an add instruction */
293 COSTS_N_INSNS (1), /* cost of a lea instruction */
294 COSTS_N_INSNS (4), /* variable shift costs */
295 COSTS_N_INSNS (1), /* constant shift costs */
296 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
297 COSTS_N_INSNS (11), /* HI */
298 COSTS_N_INSNS (11), /* SI */
299 COSTS_N_INSNS (11), /* DI */
300 COSTS_N_INSNS (11)}, /* other */
301 0, /* cost of multiply per each bit set */
302 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
303 COSTS_N_INSNS (25), /* HI */
304 COSTS_N_INSNS (25), /* SI */
305 COSTS_N_INSNS (25), /* DI */
306 COSTS_N_INSNS (25)}, /* other */
307 COSTS_N_INSNS (3), /* cost of movsx */
308 COSTS_N_INSNS (2), /* cost of movzx */
309 8, /* "large" insn */
311 6, /* cost for loading QImode using movzbl */
312 {2, 4, 2}, /* cost of loading integer registers
313 in QImode, HImode and SImode.
314 Relative to reg-reg move (2). */
315 {2, 4, 2}, /* cost of storing integer registers */
316 2, /* cost of reg,reg fld/fst */
317 {2, 2, 6}, /* cost of loading fp registers
318 in SFmode, DFmode and XFmode */
319 {4, 4, 6}, /* cost of storing fp registers
320 in SFmode, DFmode and XFmode */
321 8, /* cost of moving MMX register */
322 {8, 8}, /* cost of loading MMX registers
323 in SImode and DImode */
324 {8, 8}, /* cost of storing MMX registers
325 in SImode and DImode */
326 2, /* cost of moving SSE register */
327 {4, 8, 16}, /* cost of loading SSE registers
328 in SImode, DImode and TImode */
329 {4, 8, 16}, /* cost of storing SSE registers
330 in SImode, DImode and TImode */
331 3, /* MMX or SSE register to integer */
332 8, /* size of l1 cache. */
333 8, /* size of l2 cache */
334 0, /* size of prefetch block */
335 0, /* number of parallel prefetches */
337 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
338 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
339 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
340 COSTS_N_INSNS (1), /* cost of FABS instruction. */
341 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
342 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
343 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
344 DUMMY_STRINGOP_ALGS},
345 {{libcall, {{-1, rep_prefix_4_byte}}},
346 DUMMY_STRINGOP_ALGS},
347 1, /* scalar_stmt_cost. */
348 1, /* scalar load_cost. */
349 1, /* scalar_store_cost. */
350 1, /* vec_stmt_cost. */
351 1, /* vec_to_scalar_cost. */
352 1, /* scalar_to_vec_cost. */
353 1, /* vec_align_load_cost. */
354 2, /* vec_unalign_load_cost. */
355 1, /* vec_store_cost. */
356 3, /* cond_taken_branch_cost. */
357 1, /* cond_not_taken_branch_cost. */
361 struct processor_costs pentiumpro_cost = {
362 COSTS_N_INSNS (1), /* cost of an add instruction */
363 COSTS_N_INSNS (1), /* cost of a lea instruction */
364 COSTS_N_INSNS (1), /* variable shift costs */
365 COSTS_N_INSNS (1), /* constant shift costs */
366 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
367 COSTS_N_INSNS (4), /* HI */
368 COSTS_N_INSNS (4), /* SI */
369 COSTS_N_INSNS (4), /* DI */
370 COSTS_N_INSNS (4)}, /* other */
371 0, /* cost of multiply per each bit set */
372 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
373 COSTS_N_INSNS (17), /* HI */
374 COSTS_N_INSNS (17), /* SI */
375 COSTS_N_INSNS (17), /* DI */
376 COSTS_N_INSNS (17)}, /* other */
377 COSTS_N_INSNS (1), /* cost of movsx */
378 COSTS_N_INSNS (1), /* cost of movzx */
379 8, /* "large" insn */
381 2, /* cost for loading QImode using movzbl */
382 {4, 4, 4}, /* cost of loading integer registers
383 in QImode, HImode and SImode.
384 Relative to reg-reg move (2). */
385 {2, 2, 2}, /* cost of storing integer registers */
386 2, /* cost of reg,reg fld/fst */
387 {2, 2, 6}, /* cost of loading fp registers
388 in SFmode, DFmode and XFmode */
389 {4, 4, 6}, /* cost of storing fp registers
390 in SFmode, DFmode and XFmode */
391 2, /* cost of moving MMX register */
392 {2, 2}, /* cost of loading MMX registers
393 in SImode and DImode */
394 {2, 2}, /* cost of storing MMX registers
395 in SImode and DImode */
396 2, /* cost of moving SSE register */
397 {2, 2, 8}, /* cost of loading SSE registers
398 in SImode, DImode and TImode */
399 {2, 2, 8}, /* cost of storing SSE registers
400 in SImode, DImode and TImode */
401 3, /* MMX or SSE register to integer */
402 8, /* size of l1 cache. */
403 256, /* size of l2 cache */
404 32, /* size of prefetch block */
405 6, /* number of parallel prefetches */
407 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
408 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
409 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
410 COSTS_N_INSNS (2), /* cost of FABS instruction. */
411 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
412 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
413 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
414 the alignment). For small blocks inline loop is still a noticeable win, for bigger
415 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
416 more expensive startup time in CPU, but after 4K the difference is down in the noise.
418 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
419 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
420 DUMMY_STRINGOP_ALGS},
421 {{rep_prefix_4_byte, {{1024, unrolled_loop},
422 {8192, rep_prefix_4_byte}, {-1, libcall}}},
423 DUMMY_STRINGOP_ALGS},
424 1, /* scalar_stmt_cost. */
425 1, /* scalar load_cost. */
426 1, /* scalar_store_cost. */
427 1, /* vec_stmt_cost. */
428 1, /* vec_to_scalar_cost. */
429 1, /* scalar_to_vec_cost. */
430 1, /* vec_align_load_cost. */
431 2, /* vec_unalign_load_cost. */
432 1, /* vec_store_cost. */
433 3, /* cond_taken_branch_cost. */
434 1, /* cond_not_taken_branch_cost. */
438 struct processor_costs geode_cost = {
439 COSTS_N_INSNS (1), /* cost of an add instruction */
440 COSTS_N_INSNS (1), /* cost of a lea instruction */
441 COSTS_N_INSNS (2), /* variable shift costs */
442 COSTS_N_INSNS (1), /* constant shift costs */
443 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
444 COSTS_N_INSNS (4), /* HI */
445 COSTS_N_INSNS (7), /* SI */
446 COSTS_N_INSNS (7), /* DI */
447 COSTS_N_INSNS (7)}, /* other */
448 0, /* cost of multiply per each bit set */
449 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
450 COSTS_N_INSNS (23), /* HI */
451 COSTS_N_INSNS (39), /* SI */
452 COSTS_N_INSNS (39), /* DI */
453 COSTS_N_INSNS (39)}, /* other */
454 COSTS_N_INSNS (1), /* cost of movsx */
455 COSTS_N_INSNS (1), /* cost of movzx */
456 8, /* "large" insn */
458 1, /* cost for loading QImode using movzbl */
459 {1, 1, 1}, /* cost of loading integer registers
460 in QImode, HImode and SImode.
461 Relative to reg-reg move (2). */
462 {1, 1, 1}, /* cost of storing integer registers */
463 1, /* cost of reg,reg fld/fst */
464 {1, 1, 1}, /* cost of loading fp registers
465 in SFmode, DFmode and XFmode */
466 {4, 6, 6}, /* cost of storing fp registers
467 in SFmode, DFmode and XFmode */
469 1, /* cost of moving MMX register */
470 {1, 1}, /* cost of loading MMX registers
471 in SImode and DImode */
472 {1, 1}, /* cost of storing MMX registers
473 in SImode and DImode */
474 1, /* cost of moving SSE register */
475 {1, 1, 1}, /* cost of loading SSE registers
476 in SImode, DImode and TImode */
477 {1, 1, 1}, /* cost of storing SSE registers
478 in SImode, DImode and TImode */
479 1, /* MMX or SSE register to integer */
480 64, /* size of l1 cache. */
481 128, /* size of l2 cache. */
482 32, /* size of prefetch block */
483 1, /* number of parallel prefetches */
485 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
486 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
487 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
488 COSTS_N_INSNS (1), /* cost of FABS instruction. */
489 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
490 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
491 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
492 DUMMY_STRINGOP_ALGS},
493 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
494 DUMMY_STRINGOP_ALGS},
495 1, /* scalar_stmt_cost. */
496 1, /* scalar load_cost. */
497 1, /* scalar_store_cost. */
498 1, /* vec_stmt_cost. */
499 1, /* vec_to_scalar_cost. */
500 1, /* scalar_to_vec_cost. */
501 1, /* vec_align_load_cost. */
502 2, /* vec_unalign_load_cost. */
503 1, /* vec_store_cost. */
504 3, /* cond_taken_branch_cost. */
505 1, /* cond_not_taken_branch_cost. */
509 struct processor_costs k6_cost = {
510 COSTS_N_INSNS (1), /* cost of an add instruction */
511 COSTS_N_INSNS (2), /* cost of a lea instruction */
512 COSTS_N_INSNS (1), /* variable shift costs */
513 COSTS_N_INSNS (1), /* constant shift costs */
514 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
515 COSTS_N_INSNS (3), /* HI */
516 COSTS_N_INSNS (3), /* SI */
517 COSTS_N_INSNS (3), /* DI */
518 COSTS_N_INSNS (3)}, /* other */
519 0, /* cost of multiply per each bit set */
520 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
521 COSTS_N_INSNS (18), /* HI */
522 COSTS_N_INSNS (18), /* SI */
523 COSTS_N_INSNS (18), /* DI */
524 COSTS_N_INSNS (18)}, /* other */
525 COSTS_N_INSNS (2), /* cost of movsx */
526 COSTS_N_INSNS (2), /* cost of movzx */
527 8, /* "large" insn */
529 3, /* cost for loading QImode using movzbl */
530 {4, 5, 4}, /* cost of loading integer registers
531 in QImode, HImode and SImode.
532 Relative to reg-reg move (2). */
533 {2, 3, 2}, /* cost of storing integer registers */
534 4, /* cost of reg,reg fld/fst */
535 {6, 6, 6}, /* cost of loading fp registers
536 in SFmode, DFmode and XFmode */
537 {4, 4, 4}, /* cost of storing fp registers
538 in SFmode, DFmode and XFmode */
539 2, /* cost of moving MMX register */
540 {2, 2}, /* cost of loading MMX registers
541 in SImode and DImode */
542 {2, 2}, /* cost of storing MMX registers
543 in SImode and DImode */
544 2, /* cost of moving SSE register */
545 {2, 2, 8}, /* cost of loading SSE registers
546 in SImode, DImode and TImode */
547 {2, 2, 8}, /* cost of storing SSE registers
548 in SImode, DImode and TImode */
549 6, /* MMX or SSE register to integer */
550 32, /* size of l1 cache. */
551 32, /* size of l2 cache. Some models
552 have integrated l2 cache, but
553 optimizing for k6 is not important
554 enough to worry about that. */
555 32, /* size of prefetch block */
556 1, /* number of parallel prefetches */
558 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
559 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
560 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
561 COSTS_N_INSNS (2), /* cost of FABS instruction. */
562 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
563 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
564 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
565 DUMMY_STRINGOP_ALGS},
566 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
567 DUMMY_STRINGOP_ALGS},
568 1, /* scalar_stmt_cost. */
569 1, /* scalar load_cost. */
570 1, /* scalar_store_cost. */
571 1, /* vec_stmt_cost. */
572 1, /* vec_to_scalar_cost. */
573 1, /* scalar_to_vec_cost. */
574 1, /* vec_align_load_cost. */
575 2, /* vec_unalign_load_cost. */
576 1, /* vec_store_cost. */
577 3, /* cond_taken_branch_cost. */
578 1, /* cond_not_taken_branch_cost. */
582 struct processor_costs athlon_cost = {
583 COSTS_N_INSNS (1), /* cost of an add instruction */
584 COSTS_N_INSNS (2), /* cost of a lea instruction */
585 COSTS_N_INSNS (1), /* variable shift costs */
586 COSTS_N_INSNS (1), /* constant shift costs */
587 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
588 COSTS_N_INSNS (5), /* HI */
589 COSTS_N_INSNS (5), /* SI */
590 COSTS_N_INSNS (5), /* DI */
591 COSTS_N_INSNS (5)}, /* other */
592 0, /* cost of multiply per each bit set */
593 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
594 COSTS_N_INSNS (26), /* HI */
595 COSTS_N_INSNS (42), /* SI */
596 COSTS_N_INSNS (74), /* DI */
597 COSTS_N_INSNS (74)}, /* other */
598 COSTS_N_INSNS (1), /* cost of movsx */
599 COSTS_N_INSNS (1), /* cost of movzx */
600 8, /* "large" insn */
602 4, /* cost for loading QImode using movzbl */
603 {3, 4, 3}, /* cost of loading integer registers
604 in QImode, HImode and SImode.
605 Relative to reg-reg move (2). */
606 {3, 4, 3}, /* cost of storing integer registers */
607 4, /* cost of reg,reg fld/fst */
608 {4, 4, 12}, /* cost of loading fp registers
609 in SFmode, DFmode and XFmode */
610 {6, 6, 8}, /* cost of storing fp registers
611 in SFmode, DFmode and XFmode */
612 2, /* cost of moving MMX register */
613 {4, 4}, /* cost of loading MMX registers
614 in SImode and DImode */
615 {4, 4}, /* cost of storing MMX registers
616 in SImode and DImode */
617 2, /* cost of moving SSE register */
618 {4, 4, 6}, /* cost of loading SSE registers
619 in SImode, DImode and TImode */
620 {4, 4, 5}, /* cost of storing SSE registers
621 in SImode, DImode and TImode */
622 5, /* MMX or SSE register to integer */
623 64, /* size of l1 cache. */
624 256, /* size of l2 cache. */
625 64, /* size of prefetch block */
626 6, /* number of parallel prefetches */
628 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
629 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
630 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
631 COSTS_N_INSNS (2), /* cost of FABS instruction. */
632 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
633 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
634 /* For some reason, Athlon deals better with REP prefix (relative to loops)
635 compared to K8. Alignment becomes important after 8 bytes for memcpy and
636 128 bytes for memset. */
637 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
638 DUMMY_STRINGOP_ALGS},
639 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
640 DUMMY_STRINGOP_ALGS},
641 1, /* scalar_stmt_cost. */
642 1, /* scalar load_cost. */
643 1, /* scalar_store_cost. */
644 1, /* vec_stmt_cost. */
645 1, /* vec_to_scalar_cost. */
646 1, /* scalar_to_vec_cost. */
647 1, /* vec_align_load_cost. */
648 2, /* vec_unalign_load_cost. */
649 1, /* vec_store_cost. */
650 3, /* cond_taken_branch_cost. */
651 1, /* cond_not_taken_branch_cost. */
655 struct processor_costs k8_cost = {
656 COSTS_N_INSNS (1), /* cost of an add instruction */
657 COSTS_N_INSNS (2), /* cost of a lea instruction */
658 COSTS_N_INSNS (1), /* variable shift costs */
659 COSTS_N_INSNS (1), /* constant shift costs */
660 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
661 COSTS_N_INSNS (4), /* HI */
662 COSTS_N_INSNS (3), /* SI */
663 COSTS_N_INSNS (4), /* DI */
664 COSTS_N_INSNS (5)}, /* other */
665 0, /* cost of multiply per each bit set */
666 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
667 COSTS_N_INSNS (26), /* HI */
668 COSTS_N_INSNS (42), /* SI */
669 COSTS_N_INSNS (74), /* DI */
670 COSTS_N_INSNS (74)}, /* other */
671 COSTS_N_INSNS (1), /* cost of movsx */
672 COSTS_N_INSNS (1), /* cost of movzx */
673 8, /* "large" insn */
675 4, /* cost for loading QImode using movzbl */
676 {3, 4, 3}, /* cost of loading integer registers
677 in QImode, HImode and SImode.
678 Relative to reg-reg move (2). */
679 {3, 4, 3}, /* cost of storing integer registers */
680 4, /* cost of reg,reg fld/fst */
681 {4, 4, 12}, /* cost of loading fp registers
682 in SFmode, DFmode and XFmode */
683 {6, 6, 8}, /* cost of storing fp registers
684 in SFmode, DFmode and XFmode */
685 2, /* cost of moving MMX register */
686 {3, 3}, /* cost of loading MMX registers
687 in SImode and DImode */
688 {4, 4}, /* cost of storing MMX registers
689 in SImode and DImode */
690 2, /* cost of moving SSE register */
691 {4, 3, 6}, /* cost of loading SSE registers
692 in SImode, DImode and TImode */
693 {4, 4, 5}, /* cost of storing SSE registers
694 in SImode, DImode and TImode */
695 5, /* MMX or SSE register to integer */
696 64, /* size of l1 cache. */
697 512, /* size of l2 cache. */
698 64, /* size of prefetch block */
699 /* New AMD processors never drop prefetches; if they cannot be performed
700 immediately, they are queued. We set number of simultaneous prefetches
701 to a large constant to reflect this (it probably is not a good idea not
702 to limit number of prefetches at all, as their execution also takes some
704 100, /* number of parallel prefetches */
706 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
707 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
708 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
709 COSTS_N_INSNS (2), /* cost of FABS instruction. */
710 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
711 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
712 /* K8 has optimized REP instruction for medium sized blocks, but for very small
713 blocks it is better to use loop. For large blocks, libcall can do
714 nontemporary accesses and beat inline considerably. */
715 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
716 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
717 {{libcall, {{8, loop}, {24, unrolled_loop},
718 {2048, rep_prefix_4_byte}, {-1, libcall}}},
719 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
720 4, /* scalar_stmt_cost. */
721 2, /* scalar load_cost. */
722 2, /* scalar_store_cost. */
723 5, /* vec_stmt_cost. */
724 0, /* vec_to_scalar_cost. */
725 2, /* scalar_to_vec_cost. */
726 2, /* vec_align_load_cost. */
727 3, /* vec_unalign_load_cost. */
728 3, /* vec_store_cost. */
729 3, /* cond_taken_branch_cost. */
730 2, /* cond_not_taken_branch_cost. */
733 struct processor_costs amdfam10_cost = {
734 COSTS_N_INSNS (1), /* cost of an add instruction */
735 COSTS_N_INSNS (2), /* cost of a lea instruction */
736 COSTS_N_INSNS (1), /* variable shift costs */
737 COSTS_N_INSNS (1), /* constant shift costs */
738 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
739 COSTS_N_INSNS (4), /* HI */
740 COSTS_N_INSNS (3), /* SI */
741 COSTS_N_INSNS (4), /* DI */
742 COSTS_N_INSNS (5)}, /* other */
743 0, /* cost of multiply per each bit set */
744 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
745 COSTS_N_INSNS (35), /* HI */
746 COSTS_N_INSNS (51), /* SI */
747 COSTS_N_INSNS (83), /* DI */
748 COSTS_N_INSNS (83)}, /* other */
749 COSTS_N_INSNS (1), /* cost of movsx */
750 COSTS_N_INSNS (1), /* cost of movzx */
751 8, /* "large" insn */
753 4, /* cost for loading QImode using movzbl */
754 {3, 4, 3}, /* cost of loading integer registers
755 in QImode, HImode and SImode.
756 Relative to reg-reg move (2). */
757 {3, 4, 3}, /* cost of storing integer registers */
758 4, /* cost of reg,reg fld/fst */
759 {4, 4, 12}, /* cost of loading fp registers
760 in SFmode, DFmode and XFmode */
761 {6, 6, 8}, /* cost of storing fp registers
762 in SFmode, DFmode and XFmode */
763 2, /* cost of moving MMX register */
764 {3, 3}, /* cost of loading MMX registers
765 in SImode and DImode */
766 {4, 4}, /* cost of storing MMX registers
767 in SImode and DImode */
768 2, /* cost of moving SSE register */
769 {4, 4, 3}, /* cost of loading SSE registers
770 in SImode, DImode and TImode */
771 {4, 4, 5}, /* cost of storing SSE registers
772 in SImode, DImode and TImode */
773 3, /* MMX or SSE register to integer */
775 MOVD reg64, xmmreg Double FSTORE 4
776 MOVD reg32, xmmreg Double FSTORE 4
778 MOVD reg64, xmmreg Double FADD 3
780 MOVD reg32, xmmreg Double FADD 3
782 64, /* size of l1 cache. */
783 512, /* size of l2 cache. */
784 64, /* size of prefetch block */
785 /* New AMD processors never drop prefetches; if they cannot be performed
786 immediately, they are queued. We set number of simultaneous prefetches
787 to a large constant to reflect this (it probably is not a good idea not
788 to limit number of prefetches at all, as their execution also takes some
790 100, /* number of parallel prefetches */
792 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
793 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
794 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
795 COSTS_N_INSNS (2), /* cost of FABS instruction. */
796 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
797 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
799 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
800 very small blocks it is better to use loop. For large blocks, libcall can
801 do nontemporary accesses and beat inline considerably. */
802 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
803 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
804 {{libcall, {{8, loop}, {24, unrolled_loop},
805 {2048, rep_prefix_4_byte}, {-1, libcall}}},
806 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
807 4, /* scalar_stmt_cost. */
808 2, /* scalar load_cost. */
809 2, /* scalar_store_cost. */
810 6, /* vec_stmt_cost. */
811 0, /* vec_to_scalar_cost. */
812 2, /* scalar_to_vec_cost. */
813 2, /* vec_align_load_cost. */
814 2, /* vec_unalign_load_cost. */
815 2, /* vec_store_cost. */
816 2, /* cond_taken_branch_cost. */
817 1, /* cond_not_taken_branch_cost. */
821 struct processor_costs pentium4_cost = {
822 COSTS_N_INSNS (1), /* cost of an add instruction */
823 COSTS_N_INSNS (3), /* cost of a lea instruction */
824 COSTS_N_INSNS (4), /* variable shift costs */
825 COSTS_N_INSNS (4), /* constant shift costs */
826 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
827 COSTS_N_INSNS (15), /* HI */
828 COSTS_N_INSNS (15), /* SI */
829 COSTS_N_INSNS (15), /* DI */
830 COSTS_N_INSNS (15)}, /* other */
831 0, /* cost of multiply per each bit set */
832 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
833 COSTS_N_INSNS (56), /* HI */
834 COSTS_N_INSNS (56), /* SI */
835 COSTS_N_INSNS (56), /* DI */
836 COSTS_N_INSNS (56)}, /* other */
837 COSTS_N_INSNS (1), /* cost of movsx */
838 COSTS_N_INSNS (1), /* cost of movzx */
839 16, /* "large" insn */
841 2, /* cost for loading QImode using movzbl */
842 {4, 5, 4}, /* cost of loading integer registers
843 in QImode, HImode and SImode.
844 Relative to reg-reg move (2). */
845 {2, 3, 2}, /* cost of storing integer registers */
846 2, /* cost of reg,reg fld/fst */
847 {2, 2, 6}, /* cost of loading fp registers
848 in SFmode, DFmode and XFmode */
849 {4, 4, 6}, /* cost of storing fp registers
850 in SFmode, DFmode and XFmode */
851 2, /* cost of moving MMX register */
852 {2, 2}, /* cost of loading MMX registers
853 in SImode and DImode */
854 {2, 2}, /* cost of storing MMX registers
855 in SImode and DImode */
856 12, /* cost of moving SSE register */
857 {12, 12, 12}, /* cost of loading SSE registers
858 in SImode, DImode and TImode */
859 {2, 2, 8}, /* cost of storing SSE registers
860 in SImode, DImode and TImode */
861 10, /* MMX or SSE register to integer */
862 8, /* size of l1 cache. */
863 256, /* size of l2 cache. */
864 64, /* size of prefetch block */
865 6, /* number of parallel prefetches */
867 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
868 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
869 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
870 COSTS_N_INSNS (2), /* cost of FABS instruction. */
871 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
872 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
873 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
874 DUMMY_STRINGOP_ALGS},
875 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
877 DUMMY_STRINGOP_ALGS},
878 1, /* scalar_stmt_cost. */
879 1, /* scalar load_cost. */
880 1, /* scalar_store_cost. */
881 1, /* vec_stmt_cost. */
882 1, /* vec_to_scalar_cost. */
883 1, /* scalar_to_vec_cost. */
884 1, /* vec_align_load_cost. */
885 2, /* vec_unalign_load_cost. */
886 1, /* vec_store_cost. */
887 3, /* cond_taken_branch_cost. */
888 1, /* cond_not_taken_branch_cost. */
892 struct processor_costs nocona_cost = {
893 COSTS_N_INSNS (1), /* cost of an add instruction */
894 COSTS_N_INSNS (1), /* cost of a lea instruction */
895 COSTS_N_INSNS (1), /* variable shift costs */
896 COSTS_N_INSNS (1), /* constant shift costs */
897 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
898 COSTS_N_INSNS (10), /* HI */
899 COSTS_N_INSNS (10), /* SI */
900 COSTS_N_INSNS (10), /* DI */
901 COSTS_N_INSNS (10)}, /* other */
902 0, /* cost of multiply per each bit set */
903 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
904 COSTS_N_INSNS (66), /* HI */
905 COSTS_N_INSNS (66), /* SI */
906 COSTS_N_INSNS (66), /* DI */
907 COSTS_N_INSNS (66)}, /* other */
908 COSTS_N_INSNS (1), /* cost of movsx */
909 COSTS_N_INSNS (1), /* cost of movzx */
910 16, /* "large" insn */
912 4, /* cost for loading QImode using movzbl */
913 {4, 4, 4}, /* cost of loading integer registers
914 in QImode, HImode and SImode.
915 Relative to reg-reg move (2). */
916 {4, 4, 4}, /* cost of storing integer registers */
917 3, /* cost of reg,reg fld/fst */
918 {12, 12, 12}, /* cost of loading fp registers
919 in SFmode, DFmode and XFmode */
920 {4, 4, 4}, /* cost of storing fp registers
921 in SFmode, DFmode and XFmode */
922 6, /* cost of moving MMX register */
923 {12, 12}, /* cost of loading MMX registers
924 in SImode and DImode */
925 {12, 12}, /* cost of storing MMX registers
926 in SImode and DImode */
927 6, /* cost of moving SSE register */
928 {12, 12, 12}, /* cost of loading SSE registers
929 in SImode, DImode and TImode */
930 {12, 12, 12}, /* cost of storing SSE registers
931 in SImode, DImode and TImode */
932 8, /* MMX or SSE register to integer */
933 8, /* size of l1 cache. */
934 1024, /* size of l2 cache. */
935 128, /* size of prefetch block */
936 8, /* number of parallel prefetches */
938 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
939 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
940 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
941 COSTS_N_INSNS (3), /* cost of FABS instruction. */
942 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
943 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
944 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
945 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
946 {100000, unrolled_loop}, {-1, libcall}}}},
947 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
949 {libcall, {{24, loop}, {64, unrolled_loop},
950 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
951 1, /* scalar_stmt_cost. */
952 1, /* scalar load_cost. */
953 1, /* scalar_store_cost. */
954 1, /* vec_stmt_cost. */
955 1, /* vec_to_scalar_cost. */
956 1, /* scalar_to_vec_cost. */
957 1, /* vec_align_load_cost. */
958 2, /* vec_unalign_load_cost. */
959 1, /* vec_store_cost. */
960 3, /* cond_taken_branch_cost. */
961 1, /* cond_not_taken_branch_cost. */
965 struct processor_costs core2_cost = {
966 COSTS_N_INSNS (1), /* cost of an add instruction */
967 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
968 COSTS_N_INSNS (1), /* variable shift costs */
969 COSTS_N_INSNS (1), /* constant shift costs */
970 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
971 COSTS_N_INSNS (3), /* HI */
972 COSTS_N_INSNS (3), /* SI */
973 COSTS_N_INSNS (3), /* DI */
974 COSTS_N_INSNS (3)}, /* other */
975 0, /* cost of multiply per each bit set */
976 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
977 COSTS_N_INSNS (22), /* HI */
978 COSTS_N_INSNS (22), /* SI */
979 COSTS_N_INSNS (22), /* DI */
980 COSTS_N_INSNS (22)}, /* other */
981 COSTS_N_INSNS (1), /* cost of movsx */
982 COSTS_N_INSNS (1), /* cost of movzx */
983 8, /* "large" insn */
985 2, /* cost for loading QImode using movzbl */
986 {6, 6, 6}, /* cost of loading integer registers
987 in QImode, HImode and SImode.
988 Relative to reg-reg move (2). */
989 {4, 4, 4}, /* cost of storing integer registers */
990 2, /* cost of reg,reg fld/fst */
991 {6, 6, 6}, /* cost of loading fp registers
992 in SFmode, DFmode and XFmode */
993 {4, 4, 4}, /* cost of loading integer registers */
994 2, /* cost of moving MMX register */
995 {6, 6}, /* cost of loading MMX registers
996 in SImode and DImode */
997 {4, 4}, /* cost of storing MMX registers
998 in SImode and DImode */
999 2, /* cost of moving SSE register */
1000 {6, 6, 6}, /* cost of loading SSE registers
1001 in SImode, DImode and TImode */
1002 {4, 4, 4}, /* cost of storing SSE registers
1003 in SImode, DImode and TImode */
1004 2, /* MMX or SSE register to integer */
1005 32, /* size of l1 cache. */
1006 2048, /* size of l2 cache. */
1007 128, /* size of prefetch block */
1008 8, /* number of parallel prefetches */
1009 3, /* Branch cost */
1010 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1011 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1012 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1013 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1014 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1015 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1016 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1017 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1018 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1019 {{libcall, {{8, loop}, {15, unrolled_loop},
1020 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1021 {libcall, {{24, loop}, {32, unrolled_loop},
1022 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1023 1, /* scalar_stmt_cost. */
1024 1, /* scalar load_cost. */
1025 1, /* scalar_store_cost. */
1026 1, /* vec_stmt_cost. */
1027 1, /* vec_to_scalar_cost. */
1028 1, /* scalar_to_vec_cost. */
1029 1, /* vec_align_load_cost. */
1030 2, /* vec_unalign_load_cost. */
1031 1, /* vec_store_cost. */
1032 3, /* cond_taken_branch_cost. */
1033 1, /* cond_not_taken_branch_cost. */
1036 /* Generic64 should produce code tuned for Nocona and K8. */
1038 struct processor_costs generic64_cost = {
1039 COSTS_N_INSNS (1), /* cost of an add instruction */
1040 /* On all chips taken into consideration lea is 2 cycles and more. With
1041 this cost however our current implementation of synth_mult results in
1042 use of unnecessary temporary registers causing regression on several
1043 SPECfp benchmarks. */
1044 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1045 COSTS_N_INSNS (1), /* variable shift costs */
1046 COSTS_N_INSNS (1), /* constant shift costs */
1047 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1048 COSTS_N_INSNS (4), /* HI */
1049 COSTS_N_INSNS (3), /* SI */
1050 COSTS_N_INSNS (4), /* DI */
1051 COSTS_N_INSNS (2)}, /* other */
1052 0, /* cost of multiply per each bit set */
1053 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1054 COSTS_N_INSNS (26), /* HI */
1055 COSTS_N_INSNS (42), /* SI */
1056 COSTS_N_INSNS (74), /* DI */
1057 COSTS_N_INSNS (74)}, /* other */
1058 COSTS_N_INSNS (1), /* cost of movsx */
1059 COSTS_N_INSNS (1), /* cost of movzx */
1060 8, /* "large" insn */
1061 17, /* MOVE_RATIO */
1062 4, /* cost for loading QImode using movzbl */
1063 {4, 4, 4}, /* cost of loading integer registers
1064 in QImode, HImode and SImode.
1065 Relative to reg-reg move (2). */
1066 {4, 4, 4}, /* cost of storing integer registers */
1067 4, /* cost of reg,reg fld/fst */
1068 {12, 12, 12}, /* cost of loading fp registers
1069 in SFmode, DFmode and XFmode */
1070 {6, 6, 8}, /* cost of storing fp registers
1071 in SFmode, DFmode and XFmode */
1072 2, /* cost of moving MMX register */
1073 {8, 8}, /* cost of loading MMX registers
1074 in SImode and DImode */
1075 {8, 8}, /* cost of storing MMX registers
1076 in SImode and DImode */
1077 2, /* cost of moving SSE register */
1078 {8, 8, 8}, /* cost of loading SSE registers
1079 in SImode, DImode and TImode */
1080 {8, 8, 8}, /* cost of storing SSE registers
1081 in SImode, DImode and TImode */
1082 5, /* MMX or SSE register to integer */
1083 32, /* size of l1 cache. */
1084 512, /* size of l2 cache. */
1085 64, /* size of prefetch block */
1086 6, /* number of parallel prefetches */
1087 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1088 is increased to perhaps more appropriate value of 5. */
1089 3, /* Branch cost */
1090 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1091 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1092 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1093 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1094 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1095 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1096 {DUMMY_STRINGOP_ALGS,
1097 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1098 {DUMMY_STRINGOP_ALGS,
1099 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1100 1, /* scalar_stmt_cost. */
1101 1, /* scalar load_cost. */
1102 1, /* scalar_store_cost. */
1103 1, /* vec_stmt_cost. */
1104 1, /* vec_to_scalar_cost. */
1105 1, /* scalar_to_vec_cost. */
1106 1, /* vec_align_load_cost. */
1107 2, /* vec_unalign_load_cost. */
1108 1, /* vec_store_cost. */
1109 3, /* cond_taken_branch_cost. */
1110 1, /* cond_not_taken_branch_cost. */
1113 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1115 struct processor_costs generic32_cost = {
1116 COSTS_N_INSNS (1), /* cost of an add instruction */
1117 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1118 COSTS_N_INSNS (1), /* variable shift costs */
1119 COSTS_N_INSNS (1), /* constant shift costs */
1120 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1121 COSTS_N_INSNS (4), /* HI */
1122 COSTS_N_INSNS (3), /* SI */
1123 COSTS_N_INSNS (4), /* DI */
1124 COSTS_N_INSNS (2)}, /* other */
1125 0, /* cost of multiply per each bit set */
1126 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1127 COSTS_N_INSNS (26), /* HI */
1128 COSTS_N_INSNS (42), /* SI */
1129 COSTS_N_INSNS (74), /* DI */
1130 COSTS_N_INSNS (74)}, /* other */
1131 COSTS_N_INSNS (1), /* cost of movsx */
1132 COSTS_N_INSNS (1), /* cost of movzx */
1133 8, /* "large" insn */
1134 17, /* MOVE_RATIO */
1135 4, /* cost for loading QImode using movzbl */
1136 {4, 4, 4}, /* cost of loading integer registers
1137 in QImode, HImode and SImode.
1138 Relative to reg-reg move (2). */
1139 {4, 4, 4}, /* cost of storing integer registers */
1140 4, /* cost of reg,reg fld/fst */
1141 {12, 12, 12}, /* cost of loading fp registers
1142 in SFmode, DFmode and XFmode */
1143 {6, 6, 8}, /* cost of storing fp registers
1144 in SFmode, DFmode and XFmode */
1145 2, /* cost of moving MMX register */
1146 {8, 8}, /* cost of loading MMX registers
1147 in SImode and DImode */
1148 {8, 8}, /* cost of storing MMX registers
1149 in SImode and DImode */
1150 2, /* cost of moving SSE register */
1151 {8, 8, 8}, /* cost of loading SSE registers
1152 in SImode, DImode and TImode */
1153 {8, 8, 8}, /* cost of storing SSE registers
1154 in SImode, DImode and TImode */
1155 5, /* MMX or SSE register to integer */
1156 32, /* size of l1 cache. */
1157 256, /* size of l2 cache. */
1158 64, /* size of prefetch block */
1159 6, /* number of parallel prefetches */
1160 3, /* Branch cost */
1161 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1162 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1163 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1164 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1165 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1166 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1167 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1168 DUMMY_STRINGOP_ALGS},
1169 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1170 DUMMY_STRINGOP_ALGS},
1171 1, /* scalar_stmt_cost. */
1172 1, /* scalar load_cost. */
1173 1, /* scalar_store_cost. */
1174 1, /* vec_stmt_cost. */
1175 1, /* vec_to_scalar_cost. */
1176 1, /* scalar_to_vec_cost. */
1177 1, /* vec_align_load_cost. */
1178 2, /* vec_unalign_load_cost. */
1179 1, /* vec_store_cost. */
1180 3, /* cond_taken_branch_cost. */
1181 1, /* cond_not_taken_branch_cost. */
1184 const struct processor_costs *ix86_cost = &pentium_cost;
1186 /* Processor feature/optimization bitmasks. */
1187 #define m_386 (1<<PROCESSOR_I386)
1188 #define m_486 (1<<PROCESSOR_I486)
1189 #define m_PENT (1<<PROCESSOR_PENTIUM)
1190 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1191 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1192 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1193 #define m_CORE2 (1<<PROCESSOR_CORE2)
1195 #define m_GEODE (1<<PROCESSOR_GEODE)
1196 #define m_K6 (1<<PROCESSOR_K6)
1197 #define m_K6_GEODE (m_K6 | m_GEODE)
1198 #define m_K8 (1<<PROCESSOR_K8)
1199 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1200 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1201 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1202 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1204 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1205 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1207 /* Generic instruction choice should be common subset of supported CPUs
1208 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1209 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1211 /* Feature tests against the various tunings. */
1212 unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1213 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1214 negatively, so enabling for Generic64 seems like good code size
1215 tradeoff. We can't enable it for 32bit generic because it does not
1216 work well with PPro base chips. */
1217 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1219 /* X86_TUNE_PUSH_MEMORY */
1220 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1221 | m_NOCONA | m_CORE2 | m_GENERIC,
1223 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1226 /* X86_TUNE_USE_BIT_TEST */
1229 /* X86_TUNE_UNROLL_STRLEN */
1230 m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC,
1232 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1233 m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1235 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1236 on simulation result. But after P4 was made, no performance benefit
1237 was observed with branch hints. It also increases the code size.
1238 As a result, icc never generates branch hints. */
1241 /* X86_TUNE_DOUBLE_WITH_ADD */
1244 /* X86_TUNE_USE_SAHF */
1245 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1246 | m_NOCONA | m_CORE2 | m_GENERIC,
1248 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1249 partial dependencies. */
1250 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA
1251 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1253 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1254 register stalls on Generic32 compilation setting as well. However
1255 in current implementation the partial register stalls are not eliminated
1256 very well - they can be introduced via subregs synthesized by combine
1257 and can happen in caller/callee saving sequences. Because this option
1258 pays back little on PPro based chips and is in conflict with partial reg
1259 dependencies used by Athlon/P4 based chips, it is better to leave it off
1260 for generic32 for now. */
1263 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1264 m_CORE2 | m_GENERIC,
1266 /* X86_TUNE_USE_HIMODE_FIOP */
1267 m_386 | m_486 | m_K6_GEODE,
1269 /* X86_TUNE_USE_SIMODE_FIOP */
1270 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC),
1272 /* X86_TUNE_USE_MOV0 */
1275 /* X86_TUNE_USE_CLTD */
1276 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1278 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1281 /* X86_TUNE_SPLIT_LONG_MOVES */
1284 /* X86_TUNE_READ_MODIFY_WRITE */
1287 /* X86_TUNE_READ_MODIFY */
1290 /* X86_TUNE_PROMOTE_QIMODE */
1291 m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2
1292 | m_GENERIC /* | m_PENT4 ? */,
1294 /* X86_TUNE_FAST_PREFIX */
1295 ~(m_PENT | m_486 | m_386),
1297 /* X86_TUNE_SINGLE_STRINGOP */
1298 m_386 | m_PENT4 | m_NOCONA,
1300 /* X86_TUNE_QIMODE_MATH */
1303 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1304 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1305 might be considered for Generic32 if our scheme for avoiding partial
1306 stalls was more effective. */
1309 /* X86_TUNE_PROMOTE_QI_REGS */
1312 /* X86_TUNE_PROMOTE_HI_REGS */
1315 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1316 m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1318 /* X86_TUNE_ADD_ESP_8 */
1319 m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386
1320 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1322 /* X86_TUNE_SUB_ESP_4 */
1323 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1325 /* X86_TUNE_SUB_ESP_8 */
1326 m_AMD_MULTIPLE | m_PPRO | m_386 | m_486
1327 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1329 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1330 for DFmode copies */
1331 ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1332 | m_GENERIC | m_GEODE),
1334 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1335 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1337 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1338 conflict here in between PPro/Pentium4 based chips that thread 128bit
1339 SSE registers as single units versus K8 based chips that divide SSE
1340 registers to two 64bit halves. This knob promotes all store destinations
1341 to be 128bit to allow register renaming on 128bit SSE units, but usually
1342 results in one extra microop on 64bit SSE units. Experimental results
1343 shows that disabling this option on P4 brings over 20% SPECfp regression,
1344 while enabling it on K8 brings roughly 2.4% regression that can be partly
1345 masked by careful scheduling of moves. */
1346 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1348 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1351 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1352 are resolved on SSE register parts instead of whole registers, so we may
1353 maintain just lower part of scalar values in proper format leaving the
1354 upper part undefined. */
1357 /* X86_TUNE_SSE_TYPELESS_STORES */
1360 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1361 m_PPRO | m_PENT4 | m_NOCONA,
1363 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1364 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1366 /* X86_TUNE_PROLOGUE_USING_MOVE */
1367 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1369 /* X86_TUNE_EPILOGUE_USING_MOVE */
1370 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1372 /* X86_TUNE_SHIFT1 */
1375 /* X86_TUNE_USE_FFREEP */
1378 /* X86_TUNE_INTER_UNIT_MOVES */
1379 ~(m_AMD_MULTIPLE | m_GENERIC),
1381 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1384 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1385 than 4 branch instructions in the 16 byte window. */
1386 m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1388 /* X86_TUNE_SCHEDULE */
1389 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1391 /* X86_TUNE_USE_BT */
1394 /* X86_TUNE_USE_INCDEC */
1395 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1397 /* X86_TUNE_PAD_RETURNS */
1398 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1400 /* X86_TUNE_EXT_80387_CONSTANTS */
1401 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1403 /* X86_TUNE_SHORTEN_X87_SSE */
1406 /* X86_TUNE_AVOID_VECTOR_DECODE */
1409 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1410 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1413 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1414 vector path on AMD machines. */
1415 m_K8 | m_GENERIC64 | m_AMDFAM10,
1417 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1419 m_K8 | m_GENERIC64 | m_AMDFAM10,
1421 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1425 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1426 but one byte longer. */
1429 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1430 operand that cannot be represented using a modRM byte. The XOR
1431 replacement is long decoded, so this split helps here as well. */
1434 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1435 from integer to FP. */
1439 /* Feature tests against the various architecture variations. */
1440 unsigned int ix86_arch_features[X86_ARCH_LAST] = {
1441 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1442 ~(m_386 | m_486 | m_PENT | m_K6),
1444 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1447 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1450 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1453 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1457 static const unsigned int x86_accumulate_outgoing_args
1458 = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1460 static const unsigned int x86_arch_always_fancy_math_387
1461 = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1462 | m_NOCONA | m_CORE2 | m_GENERIC;
1464 static enum stringop_alg stringop_alg = no_stringop;
1466 /* In case the average insn count for single function invocation is
1467 lower than this constant, emit fast (but longer) prologue and
1469 #define FAST_PROLOGUE_INSN_COUNT 20
1471 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1472 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1473 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1474 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1476 /* Array of the smallest class containing reg number REGNO, indexed by
1477 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1479 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1481 /* ax, dx, cx, bx */
1482 AREG, DREG, CREG, BREG,
1483 /* si, di, bp, sp */
1484 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1486 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1487 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1490 /* flags, fpsr, fpcr, frame */
1491 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1493 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1496 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1499 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1500 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1501 /* SSE REX registers */
1502 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1506 /* The "default" register map used in 32bit mode. */
1508 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1510 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1511 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1512 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1513 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1514 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1515 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1516 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1519 static int const x86_64_int_parameter_registers[6] =
1521 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1522 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1525 static int const x86_64_ms_abi_int_parameter_registers[4] =
1527 2 /*RCX*/, 1 /*RDX*/,
1528 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1531 static int const x86_64_int_return_registers[4] =
1533 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1536 /* The "default" register map used in 64bit mode. */
1537 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1539 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1540 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1541 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1542 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1543 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1544 8,9,10,11,12,13,14,15, /* extended integer registers */
1545 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1548 /* Define the register numbers to be used in Dwarf debugging information.
1549 The SVR4 reference port C compiler uses the following register numbers
1550 in its Dwarf output code:
1551 0 for %eax (gcc regno = 0)
1552 1 for %ecx (gcc regno = 2)
1553 2 for %edx (gcc regno = 1)
1554 3 for %ebx (gcc regno = 3)
1555 4 for %esp (gcc regno = 7)
1556 5 for %ebp (gcc regno = 6)
1557 6 for %esi (gcc regno = 4)
1558 7 for %edi (gcc regno = 5)
1559 The following three DWARF register numbers are never generated by
1560 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1561 believes these numbers have these meanings.
1562 8 for %eip (no gcc equivalent)
1563 9 for %eflags (gcc regno = 17)
1564 10 for %trapno (no gcc equivalent)
1565 It is not at all clear how we should number the FP stack registers
1566 for the x86 architecture. If the version of SDB on x86/svr4 were
1567 a bit less brain dead with respect to floating-point then we would
1568 have a precedent to follow with respect to DWARF register numbers
1569 for x86 FP registers, but the SDB on x86/svr4 is so completely
1570 broken with respect to FP registers that it is hardly worth thinking
1571 of it as something to strive for compatibility with.
1572 The version of x86/svr4 SDB I have at the moment does (partially)
1573 seem to believe that DWARF register number 11 is associated with
1574 the x86 register %st(0), but that's about all. Higher DWARF
1575 register numbers don't seem to be associated with anything in
1576 particular, and even for DWARF regno 11, SDB only seems to under-
1577 stand that it should say that a variable lives in %st(0) (when
1578 asked via an `=' command) if we said it was in DWARF regno 11,
1579 but SDB still prints garbage when asked for the value of the
1580 variable in question (via a `/' command).
1581 (Also note that the labels SDB prints for various FP stack regs
1582 when doing an `x' command are all wrong.)
1583 Note that these problems generally don't affect the native SVR4
1584 C compiler because it doesn't allow the use of -O with -g and
1585 because when it is *not* optimizing, it allocates a memory
1586 location for each floating-point variable, and the memory
1587 location is what gets described in the DWARF AT_location
1588 attribute for the variable in question.
1589 Regardless of the severe mental illness of the x86/svr4 SDB, we
1590 do something sensible here and we use the following DWARF
1591 register numbers. Note that these are all stack-top-relative
1593 11 for %st(0) (gcc regno = 8)
1594 12 for %st(1) (gcc regno = 9)
1595 13 for %st(2) (gcc regno = 10)
1596 14 for %st(3) (gcc regno = 11)
1597 15 for %st(4) (gcc regno = 12)
1598 16 for %st(5) (gcc regno = 13)
1599 17 for %st(6) (gcc regno = 14)
1600 18 for %st(7) (gcc regno = 15)
1602 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1604 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1605 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1606 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1607 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1608 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1609 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1610 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1613 /* Test and compare insns in i386.md store the information needed to
1614 generate branch and scc insns here. */
1616 rtx ix86_compare_op0 = NULL_RTX;
1617 rtx ix86_compare_op1 = NULL_RTX;
1618 rtx ix86_compare_emitted = NULL_RTX;
1620 /* Size of the register save area. */
1621 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1623 /* Define the structure for the machine field in struct function. */
1625 struct stack_local_entry GTY(())
1627 unsigned short mode;
1630 struct stack_local_entry *next;
1633 /* Structure describing stack frame layout.
1634 Stack grows downward:
1640 saved frame pointer if frame_pointer_needed
1641 <- HARD_FRAME_POINTER
1646 [va_arg registers] (
1647 > to_allocate <- FRAME_POINTER
1657 HOST_WIDE_INT frame;
1659 int outgoing_arguments_size;
1662 HOST_WIDE_INT to_allocate;
1663 /* The offsets relative to ARG_POINTER. */
1664 HOST_WIDE_INT frame_pointer_offset;
1665 HOST_WIDE_INT hard_frame_pointer_offset;
1666 HOST_WIDE_INT stack_pointer_offset;
1668 /* When save_regs_using_mov is set, emit prologue using
1669 move instead of push instructions. */
1670 bool save_regs_using_mov;
1673 /* Code model option. */
1674 enum cmodel ix86_cmodel;
1676 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1678 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1680 /* Which unit we are generating floating point math for. */
1681 enum fpmath_unit ix86_fpmath;
1683 /* Which cpu are we scheduling for. */
1684 enum processor_type ix86_tune;
1686 /* Which instruction set architecture to use. */
1687 enum processor_type ix86_arch;
1689 /* true if sse prefetch instruction is not NOOP. */
1690 int x86_prefetch_sse;
1692 /* ix86_regparm_string as a number */
1693 static int ix86_regparm;
1695 /* -mstackrealign option */
1696 extern int ix86_force_align_arg_pointer;
1697 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1699 /* Preferred alignment for stack boundary in bits. */
1700 unsigned int ix86_preferred_stack_boundary;
1702 /* Values 1-5: see jump.c */
1703 int ix86_branch_cost;
1705 /* Variables which are this size or smaller are put in the data/bss
1706 or ldata/lbss sections. */
1708 int ix86_section_threshold = 65536;
1710 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1711 char internal_label_prefix[16];
1712 int internal_label_prefix_len;
1714 /* Fence to use after loop using movnt. */
1717 /* Register class used for passing given 64bit part of the argument.
1718 These represent classes as documented by the PS ABI, with the exception
1719 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1720 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1722 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1723 whenever possible (upper half does contain padding). */
1724 enum x86_64_reg_class
1727 X86_64_INTEGER_CLASS,
1728 X86_64_INTEGERSI_CLASS,
1735 X86_64_COMPLEX_X87_CLASS,
1738 static const char * const x86_64_reg_class_name[] =
1740 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1741 "sseup", "x87", "x87up", "cplx87", "no"
1744 #define MAX_CLASSES 4
1746 /* Table of constants used by fldpi, fldln2, etc.... */
1747 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1748 static bool ext_80387_constants_init = 0;
1751 static struct machine_function * ix86_init_machine_status (void);
1752 static rtx ix86_function_value (const_tree, const_tree, bool);
1753 static int ix86_function_regparm (const_tree, const_tree);
1754 static void ix86_compute_frame_layout (struct ix86_frame *);
1755 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1759 /* The svr4 ABI for the i386 says that records and unions are returned
1761 #ifndef DEFAULT_PCC_STRUCT_RETURN
1762 #define DEFAULT_PCC_STRUCT_RETURN 1
1765 /* Bit flags that specify the ISA we are compiling for. */
1766 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1768 /* A mask of ix86_isa_flags that includes bit X if X
1769 was set or cleared on the command line. */
1770 static int ix86_isa_flags_explicit;
1772 /* Define a set of ISAs which are available when a given ISA is
1773 enabled. MMX and SSE ISAs are handled separately. */
1775 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1776 #define OPTION_MASK_ISA_3DNOW_SET \
1777 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1779 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1780 #define OPTION_MASK_ISA_SSE2_SET \
1781 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1782 #define OPTION_MASK_ISA_SSE3_SET \
1783 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1784 #define OPTION_MASK_ISA_SSSE3_SET \
1785 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1786 #define OPTION_MASK_ISA_SSE4_1_SET \
1787 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1788 #define OPTION_MASK_ISA_SSE4_2_SET \
1789 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1791 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1793 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1795 #define OPTION_MASK_ISA_SSE4A_SET \
1796 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1797 #define OPTION_MASK_ISA_SSE5_SET \
1798 (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
1800 /* Define a set of ISAs which aren't available when a given ISA is
1801 disabled. MMX and SSE ISAs are handled separately. */
1803 #define OPTION_MASK_ISA_MMX_UNSET \
1804 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1805 #define OPTION_MASK_ISA_3DNOW_UNSET \
1806 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1807 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1809 #define OPTION_MASK_ISA_SSE_UNSET \
1810 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1811 #define OPTION_MASK_ISA_SSE2_UNSET \
1812 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
1813 #define OPTION_MASK_ISA_SSE3_UNSET \
1814 (OPTION_MASK_ISA_SSE3 \
1815 | OPTION_MASK_ISA_SSSE3_UNSET \
1816 | OPTION_MASK_ISA_SSE4A_UNSET )
1817 #define OPTION_MASK_ISA_SSSE3_UNSET \
1818 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
1819 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1820 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
1821 #define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4_2
1823 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
1825 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1827 #define OPTION_MASK_ISA_SSE4A_UNSET \
1828 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
1830 #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
1832 /* Vectorization library interface and handlers. */
1833 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
1834 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
1835 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
1837 /* Implement TARGET_HANDLE_OPTION. */
1840 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1847 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
1848 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
1852 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
1853 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
1860 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
1861 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
1865 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
1866 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
1876 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
1877 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
1881 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
1882 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
1889 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
1890 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
1894 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
1895 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
1902 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
1903 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
1907 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
1908 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
1915 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
1916 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
1920 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
1921 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
1928 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
1929 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
1933 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
1934 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
1941 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
1942 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
1946 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
1947 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
1952 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
1953 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
1957 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
1958 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
1964 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
1965 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
1969 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
1970 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
1977 ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
1978 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
1982 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
1983 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
1992 /* Sometimes certain combinations of command options do not make
1993 sense on a particular target machine. You can define a macro
1994 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1995 defined, is executed once just after all the command options have
1998 Don't use this macro to turn on various extra optimizations for
1999 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2002 override_options (void)
2005 int ix86_tune_defaulted = 0;
2006 int ix86_arch_specified = 0;
2007 unsigned int ix86_arch_mask, ix86_tune_mask;
2009 /* Comes from final.c -- no real reason to change it. */
2010 #define MAX_CODE_ALIGN 16
2014 const struct processor_costs *cost; /* Processor costs */
2015 const int align_loop; /* Default alignments. */
2016 const int align_loop_max_skip;
2017 const int align_jump;
2018 const int align_jump_max_skip;
2019 const int align_func;
2021 const processor_target_table[PROCESSOR_max] =
2023 {&i386_cost, 4, 3, 4, 3, 4},
2024 {&i486_cost, 16, 15, 16, 15, 16},
2025 {&pentium_cost, 16, 7, 16, 7, 16},
2026 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2027 {&geode_cost, 0, 0, 0, 0, 0},
2028 {&k6_cost, 32, 7, 32, 7, 32},
2029 {&athlon_cost, 16, 7, 16, 7, 16},
2030 {&pentium4_cost, 0, 0, 0, 0, 0},
2031 {&k8_cost, 16, 7, 16, 7, 16},
2032 {&nocona_cost, 0, 0, 0, 0, 0},
2033 {&core2_cost, 16, 10, 16, 10, 16},
2034 {&generic32_cost, 16, 7, 16, 7, 16},
2035 {&generic64_cost, 16, 10, 16, 10, 16},
2036 {&amdfam10_cost, 32, 24, 32, 7, 32}
2039 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2070 PTA_PREFETCH_SSE = 1 << 4,
2072 PTA_3DNOW_A = 1 << 6,
2076 PTA_POPCNT = 1 << 10,
2078 PTA_SSE4A = 1 << 12,
2079 PTA_NO_SAHF = 1 << 13,
2080 PTA_SSE4_1 = 1 << 14,
2081 PTA_SSE4_2 = 1 << 15,
2084 PTA_PCLMUL = 1 << 18
2089 const char *const name; /* processor name or nickname. */
2090 const enum processor_type processor;
2091 const unsigned /*enum pta_flags*/ flags;
2093 const processor_alias_table[] =
2095 {"i386", PROCESSOR_I386, 0},
2096 {"i486", PROCESSOR_I486, 0},
2097 {"i586", PROCESSOR_PENTIUM, 0},
2098 {"pentium", PROCESSOR_PENTIUM, 0},
2099 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
2100 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
2101 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2102 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2103 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2104 {"i686", PROCESSOR_PENTIUMPRO, 0},
2105 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
2106 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
2107 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2108 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2109 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_SSE2},
2110 {"pentium4", PROCESSOR_PENTIUM4, PTA_MMX |PTA_SSE | PTA_SSE2},
2111 {"pentium4m", PROCESSOR_PENTIUM4, PTA_MMX | PTA_SSE | PTA_SSE2},
2112 {"prescott", PROCESSOR_NOCONA, PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2113 {"nocona", PROCESSOR_NOCONA, (PTA_64BIT
2114 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2115 | PTA_CX16 | PTA_NO_SAHF)},
2116 {"core2", PROCESSOR_CORE2, (PTA_64BIT
2117 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2120 {"geode", PROCESSOR_GEODE, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2121 |PTA_PREFETCH_SSE)},
2122 {"k6", PROCESSOR_K6, PTA_MMX},
2123 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2124 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2125 {"athlon", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2126 | PTA_PREFETCH_SSE)},
2127 {"athlon-tbird", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2128 | PTA_PREFETCH_SSE)},
2129 {"athlon-4", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2131 {"athlon-xp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2133 {"athlon-mp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2135 {"x86-64", PROCESSOR_K8, (PTA_64BIT
2136 | PTA_MMX | PTA_SSE | PTA_SSE2
2138 {"k8", PROCESSOR_K8, (PTA_64BIT
2139 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2140 | PTA_SSE | PTA_SSE2
2142 {"k8-sse3", PROCESSOR_K8, (PTA_64BIT
2143 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2144 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2146 {"opteron", PROCESSOR_K8, (PTA_64BIT
2147 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2148 | PTA_SSE | PTA_SSE2
2150 {"opteron-sse3", PROCESSOR_K8, (PTA_64BIT
2151 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2152 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2154 {"athlon64", PROCESSOR_K8, (PTA_64BIT
2155 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2156 | PTA_SSE | PTA_SSE2
2158 {"athlon64-sse3", PROCESSOR_K8, (PTA_64BIT
2159 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2160 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2162 {"athlon-fx", PROCESSOR_K8, (PTA_64BIT
2163 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2164 | PTA_SSE | PTA_SSE2
2166 {"amdfam10", PROCESSOR_AMDFAM10, (PTA_64BIT
2167 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2168 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2170 | PTA_CX16 | PTA_ABM)},
2171 {"barcelona", PROCESSOR_AMDFAM10, (PTA_64BIT
2172 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2173 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2175 | PTA_CX16 | PTA_ABM)},
2176 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
2177 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
2180 int const pta_size = ARRAY_SIZE (processor_alias_table);
2182 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2183 SUBTARGET_OVERRIDE_OPTIONS;
2186 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2187 SUBSUBTARGET_OVERRIDE_OPTIONS;
2190 /* -fPIC is the default for x86_64. */
2191 if (TARGET_MACHO && TARGET_64BIT)
2194 /* Set the default values for switches whose default depends on TARGET_64BIT
2195 in case they weren't overwritten by command line options. */
2198 /* Mach-O doesn't support omitting the frame pointer for now. */
2199 if (flag_omit_frame_pointer == 2)
2200 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2201 if (flag_asynchronous_unwind_tables == 2)
2202 flag_asynchronous_unwind_tables = 1;
2203 if (flag_pcc_struct_return == 2)
2204 flag_pcc_struct_return = 0;
2208 if (flag_omit_frame_pointer == 2)
2209 flag_omit_frame_pointer = 0;
2210 if (flag_asynchronous_unwind_tables == 2)
2211 flag_asynchronous_unwind_tables = 0;
2212 if (flag_pcc_struct_return == 2)
2213 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2216 /* Need to check -mtune=generic first. */
2217 if (ix86_tune_string)
2219 if (!strcmp (ix86_tune_string, "generic")
2220 || !strcmp (ix86_tune_string, "i686")
2221 /* As special support for cross compilers we read -mtune=native
2222 as -mtune=generic. With native compilers we won't see the
2223 -mtune=native, as it was changed by the driver. */
2224 || !strcmp (ix86_tune_string, "native"))
2227 ix86_tune_string = "generic64";
2229 ix86_tune_string = "generic32";
2231 else if (!strncmp (ix86_tune_string, "generic", 7))
2232 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2236 if (ix86_arch_string)
2237 ix86_tune_string = ix86_arch_string;
2238 if (!ix86_tune_string)
2240 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2241 ix86_tune_defaulted = 1;
2244 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2245 need to use a sensible tune option. */
2246 if (!strcmp (ix86_tune_string, "generic")
2247 || !strcmp (ix86_tune_string, "x86-64")
2248 || !strcmp (ix86_tune_string, "i686"))
2251 ix86_tune_string = "generic64";
2253 ix86_tune_string = "generic32";
2256 if (ix86_stringop_string)
2258 if (!strcmp (ix86_stringop_string, "rep_byte"))
2259 stringop_alg = rep_prefix_1_byte;
2260 else if (!strcmp (ix86_stringop_string, "libcall"))
2261 stringop_alg = libcall;
2262 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2263 stringop_alg = rep_prefix_4_byte;
2264 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
2265 stringop_alg = rep_prefix_8_byte;
2266 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2267 stringop_alg = loop_1_byte;
2268 else if (!strcmp (ix86_stringop_string, "loop"))
2269 stringop_alg = loop;
2270 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2271 stringop_alg = unrolled_loop;
2273 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
2275 if (!strcmp (ix86_tune_string, "x86-64"))
2276 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
2277 "-mtune=generic instead as appropriate.");
2279 if (!ix86_arch_string)
2280 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2282 ix86_arch_specified = 1;
2284 if (!strcmp (ix86_arch_string, "generic"))
2285 error ("generic CPU can be used only for -mtune= switch");
2286 if (!strncmp (ix86_arch_string, "generic", 7))
2287 error ("bad value (%s) for -march= switch", ix86_arch_string);
2289 if (ix86_cmodel_string != 0)
2291 if (!strcmp (ix86_cmodel_string, "small"))
2292 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2293 else if (!strcmp (ix86_cmodel_string, "medium"))
2294 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2295 else if (!strcmp (ix86_cmodel_string, "large"))
2296 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2298 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2299 else if (!strcmp (ix86_cmodel_string, "32"))
2300 ix86_cmodel = CM_32;
2301 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2302 ix86_cmodel = CM_KERNEL;
2304 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
2308 /* For TARGET_64BIT_MS_ABI, force pic on, in order to enable the
2309 use of rip-relative addressing. This eliminates fixups that
2310 would otherwise be needed if this object is to be placed in a
2311 DLL, and is essentially just as efficient as direct addressing. */
2312 if (TARGET_64BIT_MS_ABI)
2313 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2314 else if (TARGET_64BIT)
2315 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2317 ix86_cmodel = CM_32;
2319 if (ix86_asm_string != 0)
2322 && !strcmp (ix86_asm_string, "intel"))
2323 ix86_asm_dialect = ASM_INTEL;
2324 else if (!strcmp (ix86_asm_string, "att"))
2325 ix86_asm_dialect = ASM_ATT;
2327 error ("bad value (%s) for -masm= switch", ix86_asm_string);
2329 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2330 error ("code model %qs not supported in the %s bit mode",
2331 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2332 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2333 sorry ("%i-bit mode not compiled in",
2334 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2336 for (i = 0; i < pta_size; i++)
2337 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2339 ix86_arch = processor_alias_table[i].processor;
2340 /* Default cpu tuning to the architecture. */
2341 ix86_tune = ix86_arch;
2343 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2344 error ("CPU you selected does not support x86-64 "
2347 if (processor_alias_table[i].flags & PTA_MMX
2348 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2349 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2350 if (processor_alias_table[i].flags & PTA_3DNOW
2351 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2352 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2353 if (processor_alias_table[i].flags & PTA_3DNOW_A
2354 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2355 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2356 if (processor_alias_table[i].flags & PTA_SSE
2357 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2358 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2359 if (processor_alias_table[i].flags & PTA_SSE2
2360 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2361 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2362 if (processor_alias_table[i].flags & PTA_SSE3
2363 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2364 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2365 if (processor_alias_table[i].flags & PTA_SSSE3
2366 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2367 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2368 if (processor_alias_table[i].flags & PTA_SSE4_1
2369 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2370 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2371 if (processor_alias_table[i].flags & PTA_SSE4_2
2372 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2373 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2374 if (processor_alias_table[i].flags & PTA_SSE4A
2375 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2376 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2377 if (processor_alias_table[i].flags & PTA_SSE5
2378 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2379 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2381 if (processor_alias_table[i].flags & PTA_ABM)
2383 if (processor_alias_table[i].flags & PTA_CX16)
2384 x86_cmpxchg16b = true;
2385 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM))
2387 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2388 x86_prefetch_sse = true;
2389 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)))
2391 if (processor_alias_table[i].flags & PTA_AES)
2393 if (processor_alias_table[i].flags & PTA_PCLMUL)
2400 error ("bad value (%s) for -march= switch", ix86_arch_string);
2402 ix86_arch_mask = 1u << ix86_arch;
2403 for (i = 0; i < X86_ARCH_LAST; ++i)
2404 ix86_arch_features[i] &= ix86_arch_mask;
2406 for (i = 0; i < pta_size; i++)
2407 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2409 ix86_tune = processor_alias_table[i].processor;
2410 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2412 if (ix86_tune_defaulted)
2414 ix86_tune_string = "x86-64";
2415 for (i = 0; i < pta_size; i++)
2416 if (! strcmp (ix86_tune_string,
2417 processor_alias_table[i].name))
2419 ix86_tune = processor_alias_table[i].processor;
2422 error ("CPU you selected does not support x86-64 "
2425 /* Intel CPUs have always interpreted SSE prefetch instructions as
2426 NOPs; so, we can enable SSE prefetch instructions even when
2427 -mtune (rather than -march) points us to a processor that has them.
2428 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2429 higher processors. */
2431 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2432 x86_prefetch_sse = true;
2436 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2438 /* Enable SSE2 if AES or PCLMUL is enabled. */
2439 if ((x86_aes || x86_pclmul)
2440 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2442 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2443 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2446 ix86_tune_mask = 1u << ix86_tune;
2447 for (i = 0; i < X86_TUNE_LAST; ++i)
2448 ix86_tune_features[i] &= ix86_tune_mask;
2451 ix86_cost = &size_cost;
2453 ix86_cost = processor_target_table[ix86_tune].cost;
2455 /* Arrange to set up i386_stack_locals for all functions. */
2456 init_machine_status = ix86_init_machine_status;
2458 /* Validate -mregparm= value. */
2459 if (ix86_regparm_string)
2462 warning (0, "-mregparm is ignored in 64-bit mode");
2463 i = atoi (ix86_regparm_string);
2464 if (i < 0 || i > REGPARM_MAX)
2465 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2470 ix86_regparm = REGPARM_MAX;
2472 /* If the user has provided any of the -malign-* options,
2473 warn and use that value only if -falign-* is not set.
2474 Remove this code in GCC 3.2 or later. */
2475 if (ix86_align_loops_string)
2477 warning (0, "-malign-loops is obsolete, use -falign-loops");
2478 if (align_loops == 0)
2480 i = atoi (ix86_align_loops_string);
2481 if (i < 0 || i > MAX_CODE_ALIGN)
2482 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2484 align_loops = 1 << i;
2488 if (ix86_align_jumps_string)
2490 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2491 if (align_jumps == 0)
2493 i = atoi (ix86_align_jumps_string);
2494 if (i < 0 || i > MAX_CODE_ALIGN)
2495 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2497 align_jumps = 1 << i;
2501 if (ix86_align_funcs_string)
2503 warning (0, "-malign-functions is obsolete, use -falign-functions");
2504 if (align_functions == 0)
2506 i = atoi (ix86_align_funcs_string);
2507 if (i < 0 || i > MAX_CODE_ALIGN)
2508 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2510 align_functions = 1 << i;
2514 /* Default align_* from the processor table. */
2515 if (align_loops == 0)
2517 align_loops = processor_target_table[ix86_tune].align_loop;
2518 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2520 if (align_jumps == 0)
2522 align_jumps = processor_target_table[ix86_tune].align_jump;
2523 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2525 if (align_functions == 0)
2527 align_functions = processor_target_table[ix86_tune].align_func;
2530 /* Validate -mbranch-cost= value, or provide default. */
2531 ix86_branch_cost = ix86_cost->branch_cost;
2532 if (ix86_branch_cost_string)
2534 i = atoi (ix86_branch_cost_string);
2536 error ("-mbranch-cost=%d is not between 0 and 5", i);
2538 ix86_branch_cost = i;
2540 if (ix86_section_threshold_string)
2542 i = atoi (ix86_section_threshold_string);
2544 error ("-mlarge-data-threshold=%d is negative", i);
2546 ix86_section_threshold = i;
2549 if (ix86_tls_dialect_string)
2551 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2552 ix86_tls_dialect = TLS_DIALECT_GNU;
2553 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2554 ix86_tls_dialect = TLS_DIALECT_GNU2;
2555 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2556 ix86_tls_dialect = TLS_DIALECT_SUN;
2558 error ("bad value (%s) for -mtls-dialect= switch",
2559 ix86_tls_dialect_string);
2562 if (ix87_precision_string)
2564 i = atoi (ix87_precision_string);
2565 if (i != 32 && i != 64 && i != 80)
2566 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
2571 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
2573 /* Enable by default the SSE and MMX builtins. Do allow the user to
2574 explicitly disable any of these. In particular, disabling SSE and
2575 MMX for kernel code is extremely useful. */
2576 if (!ix86_arch_specified)
2578 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
2579 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
2582 warning (0, "-mrtd is ignored in 64bit mode");
2586 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
2588 if (!ix86_arch_specified)
2590 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
2592 /* i386 ABI does not specify red zone. It still makes sense to use it
2593 when programmer takes care to stack from being destroyed. */
2594 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2595 target_flags |= MASK_NO_RED_ZONE;
2598 /* Keep nonleaf frame pointers. */
2599 if (flag_omit_frame_pointer)
2600 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2601 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2602 flag_omit_frame_pointer = 1;
2604 /* If we're doing fast math, we don't care about comparison order
2605 wrt NaNs. This lets us use a shorter comparison sequence. */
2606 if (flag_finite_math_only)
2607 target_flags &= ~MASK_IEEE_FP;
2609 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2610 since the insns won't need emulation. */
2611 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
2612 target_flags &= ~MASK_NO_FANCY_MATH_387;
2614 /* Likewise, if the target doesn't have a 387, or we've specified
2615 software floating point, don't use 387 inline intrinsics. */
2617 target_flags |= MASK_NO_FANCY_MATH_387;
2619 /* Turn on MMX builtins for -msse. */
2622 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
2623 x86_prefetch_sse = true;
2626 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
2627 if (TARGET_SSE4_2 || TARGET_ABM)
2630 /* Validate -mpreferred-stack-boundary= value, or provide default.
2631 The default of 128 bits is for Pentium III's SSE __m128. We can't
2632 change it because of optimize_size. Otherwise, we can't mix object
2633 files compiled with -Os and -On. */
2634 ix86_preferred_stack_boundary = 128;
2635 if (ix86_preferred_stack_boundary_string)
2637 i = atoi (ix86_preferred_stack_boundary_string);
2638 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2639 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2640 TARGET_64BIT ? 4 : 2);
2642 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2645 /* Accept -msseregparm only if at least SSE support is enabled. */
2646 if (TARGET_SSEREGPARM
2648 error ("-msseregparm used without SSE enabled");
2650 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2651 if (ix86_fpmath_string != 0)
2653 if (! strcmp (ix86_fpmath_string, "387"))
2654 ix86_fpmath = FPMATH_387;
2655 else if (! strcmp (ix86_fpmath_string, "sse"))
2659 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2660 ix86_fpmath = FPMATH_387;
2663 ix86_fpmath = FPMATH_SSE;
2665 else if (! strcmp (ix86_fpmath_string, "387,sse")
2666 || ! strcmp (ix86_fpmath_string, "sse,387"))
2670 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2671 ix86_fpmath = FPMATH_387;
2673 else if (!TARGET_80387)
2675 warning (0, "387 instruction set disabled, using SSE arithmetics");
2676 ix86_fpmath = FPMATH_SSE;
2679 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
2682 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2685 /* If the i387 is disabled, then do not return values in it. */
2687 target_flags &= ~MASK_FLOAT_RETURNS;
2689 /* Use external vectorized library in vectorizing intrinsics. */
2690 if (ix86_veclibabi_string)
2692 if (strcmp (ix86_veclibabi_string, "svml") == 0)
2693 ix86_veclib_handler = ix86_veclibabi_svml;
2694 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
2695 ix86_veclib_handler = ix86_veclibabi_acml;
2697 error ("unknown vectorization library ABI type (%s) for "
2698 "-mveclibabi= switch", ix86_veclibabi_string);
2701 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
2702 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2704 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2706 /* ??? Unwind info is not correct around the CFG unless either a frame
2707 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2708 unwind info generation to be aware of the CFG and propagating states
2710 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2711 || flag_exceptions || flag_non_call_exceptions)
2712 && flag_omit_frame_pointer
2713 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2715 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2716 warning (0, "unwind tables currently require either a frame pointer "
2717 "or -maccumulate-outgoing-args for correctness");
2718 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2721 /* If stack probes are required, the space used for large function
2722 arguments on the stack must also be probed, so enable
2723 -maccumulate-outgoing-args so this happens in the prologue. */
2724 if (TARGET_STACK_PROBE
2725 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2727 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2728 warning (0, "stack probing requires -maccumulate-outgoing-args "
2730 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2733 /* For sane SSE instruction set generation we need fcomi instruction.
2734 It is safe to enable all CMOVE instructions. */
2738 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2741 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2742 p = strchr (internal_label_prefix, 'X');
2743 internal_label_prefix_len = p - internal_label_prefix;
2747 /* When scheduling description is not available, disable scheduler pass
2748 so it won't slow down the compilation and make x87 code slower. */
2749 if (!TARGET_SCHEDULE)
2750 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2752 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2753 set_param_value ("simultaneous-prefetches",
2754 ix86_cost->simultaneous_prefetches);
2755 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2756 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2757 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
2758 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
2759 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
2760 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
2762 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
2763 can be optimized to ap = __builtin_next_arg (0). */
2764 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
2765 targetm.expand_builtin_va_start = NULL;
2768 /* Return true if this goes in large data/bss. */
2771 ix86_in_large_data_p (tree exp)
2773 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
2776 /* Functions are never large data. */
2777 if (TREE_CODE (exp) == FUNCTION_DECL)
2780 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
2782 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
2783 if (strcmp (section, ".ldata") == 0
2784 || strcmp (section, ".lbss") == 0)
2790 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
2792 /* If this is an incomplete type with size 0, then we can't put it
2793 in data because it might be too big when completed. */
2794 if (!size || size > ix86_section_threshold)
2801 /* Switch to the appropriate section for output of DECL.
2802 DECL is either a `VAR_DECL' node or a constant of some sort.
2803 RELOC indicates whether forming the initial value of DECL requires
2804 link-time relocations. */
2806 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
2810 x86_64_elf_select_section (tree decl, int reloc,
2811 unsigned HOST_WIDE_INT align)
2813 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2814 && ix86_in_large_data_p (decl))
2816 const char *sname = NULL;
2817 unsigned int flags = SECTION_WRITE;
2818 switch (categorize_decl_for_section (decl, reloc))
2823 case SECCAT_DATA_REL:
2824 sname = ".ldata.rel";
2826 case SECCAT_DATA_REL_LOCAL:
2827 sname = ".ldata.rel.local";
2829 case SECCAT_DATA_REL_RO:
2830 sname = ".ldata.rel.ro";
2832 case SECCAT_DATA_REL_RO_LOCAL:
2833 sname = ".ldata.rel.ro.local";
2837 flags |= SECTION_BSS;
2840 case SECCAT_RODATA_MERGE_STR:
2841 case SECCAT_RODATA_MERGE_STR_INIT:
2842 case SECCAT_RODATA_MERGE_CONST:
2846 case SECCAT_SRODATA:
2853 /* We don't split these for medium model. Place them into
2854 default sections and hope for best. */
2859 /* We might get called with string constants, but get_named_section
2860 doesn't like them as they are not DECLs. Also, we need to set
2861 flags in that case. */
2863 return get_section (sname, flags, NULL);
2864 return get_named_section (decl, sname, reloc);
2867 return default_elf_select_section (decl, reloc, align);
2870 /* Build up a unique section name, expressed as a
2871 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2872 RELOC indicates whether the initial value of EXP requires
2873 link-time relocations. */
2875 static void ATTRIBUTE_UNUSED
2876 x86_64_elf_unique_section (tree decl, int reloc)
2878 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2879 && ix86_in_large_data_p (decl))
2881 const char *prefix = NULL;
2882 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2883 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2885 switch (categorize_decl_for_section (decl, reloc))
2888 case SECCAT_DATA_REL:
2889 case SECCAT_DATA_REL_LOCAL:
2890 case SECCAT_DATA_REL_RO:
2891 case SECCAT_DATA_REL_RO_LOCAL:
2892 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2895 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2898 case SECCAT_RODATA_MERGE_STR:
2899 case SECCAT_RODATA_MERGE_STR_INIT:
2900 case SECCAT_RODATA_MERGE_CONST:
2901 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2903 case SECCAT_SRODATA:
2910 /* We don't split these for medium model. Place them into
2911 default sections and hope for best. */
2919 plen = strlen (prefix);
2921 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2922 name = targetm.strip_name_encoding (name);
2923 nlen = strlen (name);
2925 string = (char *) alloca (nlen + plen + 1);
2926 memcpy (string, prefix, plen);
2927 memcpy (string + plen, name, nlen + 1);
2929 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2933 default_unique_section (decl, reloc);
2936 #ifdef COMMON_ASM_OP
2937 /* This says how to output assembler code to declare an
2938 uninitialized external linkage data object.
2940 For medium model x86-64 we need to use .largecomm opcode for
2943 x86_elf_aligned_common (FILE *file,
2944 const char *name, unsigned HOST_WIDE_INT size,
2947 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2948 && size > (unsigned int)ix86_section_threshold)
2949 fprintf (file, ".largecomm\t");
2951 fprintf (file, "%s", COMMON_ASM_OP);
2952 assemble_name (file, name);
2953 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2954 size, align / BITS_PER_UNIT);
2958 /* Utility function for targets to use in implementing
2959 ASM_OUTPUT_ALIGNED_BSS. */
2962 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2963 const char *name, unsigned HOST_WIDE_INT size,
2966 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2967 && size > (unsigned int)ix86_section_threshold)
2968 switch_to_section (get_named_section (decl, ".lbss", 0));
2970 switch_to_section (bss_section);
2971 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2972 #ifdef ASM_DECLARE_OBJECT_NAME
2973 last_assemble_variable_decl = decl;
2974 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2976 /* Standard thing is just output label for the object. */
2977 ASM_OUTPUT_LABEL (file, name);
2978 #endif /* ASM_DECLARE_OBJECT_NAME */
2979 ASM_OUTPUT_SKIP (file, size ? size : 1);
2983 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2985 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2986 make the problem with not enough registers even worse. */
2987 #ifdef INSN_SCHEDULING
2989 flag_schedule_insns = 0;
2993 /* The Darwin libraries never set errno, so we might as well
2994 avoid calling them when that's the only reason we would. */
2995 flag_errno_math = 0;
2997 /* The default values of these switches depend on the TARGET_64BIT
2998 that is not known at this moment. Mark these values with 2 and
2999 let user the to override these. In case there is no command line option
3000 specifying them, we will set the defaults in override_options. */
3002 flag_omit_frame_pointer = 2;
3003 flag_pcc_struct_return = 2;
3004 flag_asynchronous_unwind_tables = 2;
3005 flag_vect_cost_model = 1;
3006 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
3007 SUBTARGET_OPTIMIZATION_OPTIONS;
3011 /* Decide whether we can make a sibling call to a function. DECL is the
3012 declaration of the function being targeted by the call and EXP is the
3013 CALL_EXPR representing the call. */
3016 ix86_function_ok_for_sibcall (tree decl, tree exp)
3021 /* If we are generating position-independent code, we cannot sibcall
3022 optimize any indirect call, or a direct call to a global function,
3023 as the PLT requires %ebx be live. */
3024 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
3031 func = TREE_TYPE (CALL_EXPR_FN (exp));
3032 if (POINTER_TYPE_P (func))
3033 func = TREE_TYPE (func);
3036 /* Check that the return value locations are the same. Like
3037 if we are returning floats on the 80387 register stack, we cannot
3038 make a sibcall from a function that doesn't return a float to a
3039 function that does or, conversely, from a function that does return
3040 a float to a function that doesn't; the necessary stack adjustment
3041 would not be executed. This is also the place we notice
3042 differences in the return value ABI. Note that it is ok for one
3043 of the functions to have void return type as long as the return
3044 value of the other is passed in a register. */
3045 a = ix86_function_value (TREE_TYPE (exp), func, false);
3046 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
3048 if (STACK_REG_P (a) || STACK_REG_P (b))
3050 if (!rtx_equal_p (a, b))
3053 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
3055 else if (!rtx_equal_p (a, b))
3058 /* If this call is indirect, we'll need to be able to use a call-clobbered
3059 register for the address of the target function. Make sure that all
3060 such registers are not used for passing parameters. */
3061 if (!decl && !TARGET_64BIT)
3065 /* We're looking at the CALL_EXPR, we need the type of the function. */
3066 type = CALL_EXPR_FN (exp); /* pointer expression */
3067 type = TREE_TYPE (type); /* pointer type */
3068 type = TREE_TYPE (type); /* function type */
3070 if (ix86_function_regparm (type, NULL) >= 3)
3072 /* ??? Need to count the actual number of registers to be used,
3073 not the possible number of registers. Fix later. */
3078 /* Dllimport'd functions are also called indirectly. */
3079 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
3080 && decl && DECL_DLLIMPORT_P (decl)
3081 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
3084 /* If we forced aligned the stack, then sibcalling would unalign the
3085 stack, which may break the called function. */
3086 if (cfun->machine->force_align_arg_pointer)
3089 /* Otherwise okay. That also includes certain types of indirect calls. */
3093 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
3094 calling convention attributes;
3095 arguments as in struct attribute_spec.handler. */
3098 ix86_handle_cconv_attribute (tree *node, tree name,
3100 int flags ATTRIBUTE_UNUSED,
3103 if (TREE_CODE (*node) != FUNCTION_TYPE
3104 && TREE_CODE (*node) != METHOD_TYPE
3105 && TREE_CODE (*node) != FIELD_DECL
3106 && TREE_CODE (*node) != TYPE_DECL)
3108 warning (OPT_Wattributes, "%qs attribute only applies to functions",
3109 IDENTIFIER_POINTER (name));
3110 *no_add_attrs = true;
3114 /* Can combine regparm with all attributes but fastcall. */
3115 if (is_attribute_p ("regparm", name))
3119 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3121 error ("fastcall and regparm attributes are not compatible");
3124 cst = TREE_VALUE (args);
3125 if (TREE_CODE (cst) != INTEGER_CST)
3127 warning (OPT_Wattributes,
3128 "%qs attribute requires an integer constant argument",
3129 IDENTIFIER_POINTER (name));
3130 *no_add_attrs = true;
3132 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
3134 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
3135 IDENTIFIER_POINTER (name), REGPARM_MAX);
3136 *no_add_attrs = true;
3140 && lookup_attribute (ix86_force_align_arg_pointer_string,
3141 TYPE_ATTRIBUTES (*node))
3142 && compare_tree_int (cst, REGPARM_MAX-1))
3144 error ("%s functions limited to %d register parameters",
3145 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
3153 /* Do not warn when emulating the MS ABI. */
3154 if (!TARGET_64BIT_MS_ABI)
3155 warning (OPT_Wattributes, "%qs attribute ignored",
3156 IDENTIFIER_POINTER (name));
3157 *no_add_attrs = true;
3161 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
3162 if (is_attribute_p ("fastcall", name))
3164 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
3166 error ("fastcall and cdecl attributes are not compatible");
3168 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
3170 error ("fastcall and stdcall attributes are not compatible");
3172 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
3174 error ("fastcall and regparm attributes are not compatible");
3178 /* Can combine stdcall with fastcall (redundant), regparm and
3180 else if (is_attribute_p ("stdcall", name))
3182 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
3184 error ("stdcall and cdecl attributes are not compatible");
3186 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3188 error ("stdcall and fastcall attributes are not compatible");
3192 /* Can combine cdecl with regparm and sseregparm. */
3193 else if (is_attribute_p ("cdecl", name))
3195 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
3197 error ("stdcall and cdecl attributes are not compatible");
3199 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3201 error ("fastcall and cdecl attributes are not compatible");
3205 /* Can combine sseregparm with all attributes. */
3210 /* Return 0 if the attributes for two types are incompatible, 1 if they
3211 are compatible, and 2 if they are nearly compatible (which causes a
3212 warning to be generated). */
3215 ix86_comp_type_attributes (const_tree type1, const_tree type2)
3217 /* Check for mismatch of non-default calling convention. */
3218 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
3220 if (TREE_CODE (type1) != FUNCTION_TYPE
3221 && TREE_CODE (type1) != METHOD_TYPE)
3224 /* Check for mismatched fastcall/regparm types. */
3225 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
3226 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
3227 || (ix86_function_regparm (type1, NULL)
3228 != ix86_function_regparm (type2, NULL)))
3231 /* Check for mismatched sseregparm types. */
3232 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
3233 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
3236 /* Check for mismatched return types (cdecl vs stdcall). */
3237 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
3238 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
3244 /* Return the regparm value for a function with the indicated TYPE and DECL.
3245 DECL may be NULL when calling function indirectly
3246 or considering a libcall. */
3249 ix86_function_regparm (const_tree type, const_tree decl)
3252 int regparm = ix86_regparm;
3254 static bool error_issued;
3259 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
3263 = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
3265 if (decl && TREE_CODE (decl) == FUNCTION_DECL)
3267 /* We can't use regparm(3) for nested functions because
3268 these pass static chain pointer in %ecx register. */
3269 if (!error_issued && regparm == 3
3270 && decl_function_context (decl)
3271 && !DECL_NO_STATIC_CHAIN (decl))
3273 error ("nested functions are limited to 2 register parameters");
3274 error_issued = true;
3282 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
3285 /* Use register calling convention for local functions when possible. */
3286 if (decl && TREE_CODE (decl) == FUNCTION_DECL
3287 && flag_unit_at_a_time && !profile_flag)
3289 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
3290 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3293 int local_regparm, globals = 0, regno;
3296 /* Make sure no regparm register is taken by a
3297 fixed register variable. */
3298 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
3299 if (fixed_regs[local_regparm])
3302 /* We can't use regparm(3) for nested functions as these use
3303 static chain pointer in third argument. */
3304 if (local_regparm == 3
3305 && (decl_function_context (decl)
3306 || ix86_force_align_arg_pointer)
3307 && !DECL_NO_STATIC_CHAIN (decl))
3310 /* If the function realigns its stackpointer, the prologue will
3311 clobber %ecx. If we've already generated code for the callee,
3312 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
3313 scanning the attributes for the self-realigning property. */
3314 f = DECL_STRUCT_FUNCTION (decl);
3315 if (local_regparm == 3
3316 && (f ? !!f->machine->force_align_arg_pointer
3317 : !!lookup_attribute (ix86_force_align_arg_pointer_string,
3318 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
3321 /* Each fixed register usage increases register pressure,
3322 so less registers should be used for argument passing.
3323 This functionality can be overriden by an explicit
3325 for (regno = 0; regno <= DI_REG; regno++)
3326 if (fixed_regs[regno])
3330 = globals < local_regparm ? local_regparm - globals : 0;
3332 if (local_regparm > regparm)
3333 regparm = local_regparm;
3340 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
3341 DFmode (2) arguments in SSE registers for a function with the
3342 indicated TYPE and DECL. DECL may be NULL when calling function
3343 indirectly or considering a libcall. Otherwise return 0. */
3346 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
3348 gcc_assert (!TARGET_64BIT);
3350 /* Use SSE registers to pass SFmode and DFmode arguments if requested
3351 by the sseregparm attribute. */
3352 if (TARGET_SSEREGPARM
3353 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
3360 error ("Calling %qD with attribute sseregparm without "
3361 "SSE/SSE2 enabled", decl);
3363 error ("Calling %qT with attribute sseregparm without "
3364 "SSE/SSE2 enabled", type);
3372 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3373 (and DFmode for SSE2) arguments in SSE registers. */
3374 if (decl && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
3376 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
3377 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3379 return TARGET_SSE2 ? 2 : 1;
3385 /* Return true if EAX is live at the start of the function. Used by
3386 ix86_expand_prologue to determine if we need special help before
3387 calling allocate_stack_worker. */
3390 ix86_eax_live_at_start_p (void)
3392 /* Cheat. Don't bother working forward from ix86_function_regparm
3393 to the function type to whether an actual argument is located in
3394 eax. Instead just look at cfg info, which is still close enough
3395 to correct at this point. This gives false positives for broken
3396 functions that might use uninitialized data that happens to be
3397 allocated in eax, but who cares? */
3398 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
3401 /* Value is the number of bytes of arguments automatically
3402 popped when returning from a subroutine call.
3403 FUNDECL is the declaration node of the function (as a tree),
3404 FUNTYPE is the data type of the function (as a tree),
3405 or for a library call it is an identifier node for the subroutine name.
3406 SIZE is the number of bytes of arguments passed on the stack.
3408 On the 80386, the RTD insn may be used to pop them if the number
3409 of args is fixed, but if the number is variable then the caller
3410 must pop them all. RTD can't be used for library calls now
3411 because the library is compiled with the Unix compiler.
3412 Use of RTD is a selectable option, since it is incompatible with
3413 standard Unix calling sequences. If the option is not selected,
3414 the caller must always pop the args.
3416 The attribute stdcall is equivalent to RTD on a per module basis. */
3419 ix86_return_pops_args (tree fundecl, tree funtype, int size)
3423 /* None of the 64-bit ABIs pop arguments. */
3427 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
3429 /* Cdecl functions override -mrtd, and never pop the stack. */
3430 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
3432 /* Stdcall and fastcall functions will pop the stack if not
3434 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
3435 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
3438 if (rtd && ! stdarg_p (funtype))
3442 /* Lose any fake structure return argument if it is passed on the stack. */
3443 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
3444 && !KEEP_AGGREGATE_RETURN_POINTER)
3446 int nregs = ix86_function_regparm (funtype, fundecl);
3448 return GET_MODE_SIZE (Pmode);
3454 /* Argument support functions. */
3456 /* Return true when register may be used to pass function parameters. */
3458 ix86_function_arg_regno_p (int regno)
3461 const int *parm_regs;
3466 return (regno < REGPARM_MAX
3467 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3469 return (regno < REGPARM_MAX
3470 || (TARGET_MMX && MMX_REGNO_P (regno)
3471 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3472 || (TARGET_SSE && SSE_REGNO_P (regno)
3473 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3478 if (SSE_REGNO_P (regno) && TARGET_SSE)
3483 if (TARGET_SSE && SSE_REGNO_P (regno)
3484 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3488 /* RAX is used as hidden argument to va_arg functions. */
3489 if (!TARGET_64BIT_MS_ABI && regno == AX_REG)
3492 if (TARGET_64BIT_MS_ABI)
3493 parm_regs = x86_64_ms_abi_int_parameter_registers;
3495 parm_regs = x86_64_int_parameter_registers;
3496 for (i = 0; i < REGPARM_MAX; i++)
3497 if (regno == parm_regs[i])
3502 /* Return if we do not know how to pass TYPE solely in registers. */
3505 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
3507 if (must_pass_in_stack_var_size_or_pad (mode, type))
3510 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3511 The layout_type routine is crafty and tries to trick us into passing
3512 currently unsupported vector types on the stack by using TImode. */
3513 return (!TARGET_64BIT && mode == TImode
3514 && type && TREE_CODE (type) != VECTOR_TYPE);
3517 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3518 for a call to a function whose data type is FNTYPE.
3519 For a library call, FNTYPE is 0. */
3522 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3523 tree fntype, /* tree ptr for function decl */
3524 rtx libname, /* SYMBOL_REF of library name or 0 */
3527 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
3528 memset (cum, 0, sizeof (*cum));
3530 /* Set up the number of registers to use for passing arguments. */
3531 cum->nregs = ix86_regparm;
3533 cum->sse_nregs = SSE_REGPARM_MAX;
3535 cum->mmx_nregs = MMX_REGPARM_MAX;
3536 cum->warn_sse = true;
3537 cum->warn_mmx = true;
3539 /* Because type might mismatch in between caller and callee, we need to
3540 use actual type of function for local calls.
3541 FIXME: cgraph_analyze can be told to actually record if function uses
3542 va_start so for local functions maybe_vaarg can be made aggressive
3544 FIXME: once typesytem is fixed, we won't need this code anymore. */
3546 fntype = TREE_TYPE (fndecl);
3547 cum->maybe_vaarg = (fntype
3548 ? (!prototype_p (fntype) || stdarg_p (fntype))
3553 /* If there are variable arguments, then we won't pass anything
3554 in registers in 32-bit mode. */
3555 if (cum->maybe_vaarg)
3565 /* Use ecx and edx registers if function has fastcall attribute,
3566 else look for regparm information. */
3569 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3575 cum->nregs = ix86_function_regparm (fntype, fndecl);
3578 /* Set up the number of SSE registers used for passing SFmode
3579 and DFmode arguments. Warn for mismatching ABI. */
3580 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
3584 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3585 But in the case of vector types, it is some vector mode.
3587 When we have only some of our vector isa extensions enabled, then there
3588 are some modes for which vector_mode_supported_p is false. For these
3589 modes, the generic vector support in gcc will choose some non-vector mode
3590 in order to implement the type. By computing the natural mode, we'll
3591 select the proper ABI location for the operand and not depend on whatever
3592 the middle-end decides to do with these vector types. */
3594 static enum machine_mode
3595 type_natural_mode (const_tree type)
3597 enum machine_mode mode = TYPE_MODE (type);
3599 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3601 HOST_WIDE_INT size = int_size_in_bytes (type);
3602 if ((size == 8 || size == 16)
3603 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3604 && TYPE_VECTOR_SUBPARTS (type) > 1)
3606 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3608 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3609 mode = MIN_MODE_VECTOR_FLOAT;
3611 mode = MIN_MODE_VECTOR_INT;
3613 /* Get the mode which has this inner mode and number of units. */
3614 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3615 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3616 && GET_MODE_INNER (mode) == innermode)
3626 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3627 this may not agree with the mode that the type system has chosen for the
3628 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3629 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3632 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3637 if (orig_mode != BLKmode)
3638 tmp = gen_rtx_REG (orig_mode, regno);
3641 tmp = gen_rtx_REG (mode, regno);
3642 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3643 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3649 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3650 of this code is to classify each 8bytes of incoming argument by the register
3651 class and assign registers accordingly. */
3653 /* Return the union class of CLASS1 and CLASS2.
3654 See the x86-64 PS ABI for details. */
3656 static enum x86_64_reg_class
3657 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3659 /* Rule #1: If both classes are equal, this is the resulting class. */
3660 if (class1 == class2)
3663 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3665 if (class1 == X86_64_NO_CLASS)
3667 if (class2 == X86_64_NO_CLASS)
3670 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3671 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3672 return X86_64_MEMORY_CLASS;
3674 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3675 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3676 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3677 return X86_64_INTEGERSI_CLASS;
3678 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3679 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3680 return X86_64_INTEGER_CLASS;
3682 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3684 if (class1 == X86_64_X87_CLASS
3685 || class1 == X86_64_X87UP_CLASS
3686 || class1 == X86_64_COMPLEX_X87_CLASS
3687 || class2 == X86_64_X87_CLASS
3688 || class2 == X86_64_X87UP_CLASS
3689 || class2 == X86_64_COMPLEX_X87_CLASS)
3690 return X86_64_MEMORY_CLASS;
3692 /* Rule #6: Otherwise class SSE is used. */
3693 return X86_64_SSE_CLASS;
3696 /* Classify the argument of type TYPE and mode MODE.
3697 CLASSES will be filled by the register class used to pass each word
3698 of the operand. The number of words is returned. In case the parameter
3699 should be passed in memory, 0 is returned. As a special case for zero
3700 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3702 BIT_OFFSET is used internally for handling records and specifies offset
3703 of the offset in bits modulo 256 to avoid overflow cases.
3705 See the x86-64 PS ABI for details.
3709 classify_argument (enum machine_mode mode, const_tree type,
3710 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3712 HOST_WIDE_INT bytes =
3713 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3714 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3716 /* Variable sized entities are always passed/returned in memory. */
3720 if (mode != VOIDmode
3721 && targetm.calls.must_pass_in_stack (mode, type))
3724 if (type && AGGREGATE_TYPE_P (type))
3728 enum x86_64_reg_class subclasses[MAX_CLASSES];
3730 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3734 for (i = 0; i < words; i++)
3735 classes[i] = X86_64_NO_CLASS;
3737 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3738 signalize memory class, so handle it as special case. */
3741 classes[0] = X86_64_NO_CLASS;
3745 /* Classify each field of record and merge classes. */
3746 switch (TREE_CODE (type))
3749 /* And now merge the fields of structure. */
3750 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3752 if (TREE_CODE (field) == FIELD_DECL)
3756 if (TREE_TYPE (field) == error_mark_node)
3759 /* Bitfields are always classified as integer. Handle them
3760 early, since later code would consider them to be
3761 misaligned integers. */
3762 if (DECL_BIT_FIELD (field))
3764 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3765 i < ((int_bit_position (field) + (bit_offset % 64))
3766 + tree_low_cst (DECL_SIZE (field), 0)
3769 merge_classes (X86_64_INTEGER_CLASS,
3774 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3775 TREE_TYPE (field), subclasses,
3776 (int_bit_position (field)
3777 + bit_offset) % 256);
3780 for (i = 0; i < num; i++)
3783 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3785 merge_classes (subclasses[i], classes[i + pos]);
3793 /* Arrays are handled as small records. */
3796 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3797 TREE_TYPE (type), subclasses, bit_offset);
3801 /* The partial classes are now full classes. */
3802 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3803 subclasses[0] = X86_64_SSE_CLASS;
3804 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3805 subclasses[0] = X86_64_INTEGER_CLASS;
3807 for (i = 0; i < words; i++)
3808 classes[i] = subclasses[i % num];
3813 case QUAL_UNION_TYPE:
3814 /* Unions are similar to RECORD_TYPE but offset is always 0.
3816 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3818 if (TREE_CODE (field) == FIELD_DECL)
3822 if (TREE_TYPE (field) == error_mark_node)
3825 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3826 TREE_TYPE (field), subclasses,
3830 for (i = 0; i < num; i++)
3831 classes[i] = merge_classes (subclasses[i], classes[i]);
3840 /* Final merger cleanup. */
3841 for (i = 0; i < words; i++)
3843 /* If one class is MEMORY, everything should be passed in
3845 if (classes[i] == X86_64_MEMORY_CLASS)
3848 /* The X86_64_SSEUP_CLASS should be always preceded by
3849 X86_64_SSE_CLASS. */
3850 if (classes[i] == X86_64_SSEUP_CLASS
3851 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3852 classes[i] = X86_64_SSE_CLASS;
3854 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3855 if (classes[i] == X86_64_X87UP_CLASS
3856 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3857 classes[i] = X86_64_SSE_CLASS;
3862 /* Compute alignment needed. We align all types to natural boundaries with
3863 exception of XFmode that is aligned to 64bits. */
3864 if (mode != VOIDmode && mode != BLKmode)
3866 int mode_alignment = GET_MODE_BITSIZE (mode);
3869 mode_alignment = 128;
3870 else if (mode == XCmode)
3871 mode_alignment = 256;
3872 if (COMPLEX_MODE_P (mode))
3873 mode_alignment /= 2;
3874 /* Misaligned fields are always returned in memory. */
3875 if (bit_offset % mode_alignment)
3879 /* for V1xx modes, just use the base mode */
3880 if (VECTOR_MODE_P (mode) && mode != V1DImode
3881 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3882 mode = GET_MODE_INNER (mode);
3884 /* Classification of atomic types. */
3889 classes[0] = X86_64_SSE_CLASS;
3892 classes[0] = X86_64_SSE_CLASS;
3893 classes[1] = X86_64_SSEUP_CLASS;
3902 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3903 classes[0] = X86_64_INTEGERSI_CLASS;
3905 classes[0] = X86_64_INTEGER_CLASS;
3909 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3914 if (!(bit_offset % 64))
3915 classes[0] = X86_64_SSESF_CLASS;
3917 classes[0] = X86_64_SSE_CLASS;
3920 classes[0] = X86_64_SSEDF_CLASS;
3923 classes[0] = X86_64_X87_CLASS;
3924 classes[1] = X86_64_X87UP_CLASS;
3927 classes[0] = X86_64_SSE_CLASS;
3928 classes[1] = X86_64_SSEUP_CLASS;
3931 classes[0] = X86_64_SSE_CLASS;
3934 classes[0] = X86_64_SSEDF_CLASS;
3935 classes[1] = X86_64_SSEDF_CLASS;
3938 classes[0] = X86_64_COMPLEX_X87_CLASS;
3941 /* This modes is larger than 16 bytes. */
3949 classes[0] = X86_64_SSE_CLASS;
3950 classes[1] = X86_64_SSEUP_CLASS;
3957 classes[0] = X86_64_SSE_CLASS;
3963 gcc_assert (VECTOR_MODE_P (mode));
3968 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3970 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3971 classes[0] = X86_64_INTEGERSI_CLASS;
3973 classes[0] = X86_64_INTEGER_CLASS;
3974 classes[1] = X86_64_INTEGER_CLASS;
3975 return 1 + (bytes > 8);
3979 /* Examine the argument and return set number of register required in each
3980 class. Return 0 iff parameter should be passed in memory. */
3982 examine_argument (enum machine_mode mode, const_tree type, int in_return,
3983 int *int_nregs, int *sse_nregs)
3985 enum x86_64_reg_class regclass[MAX_CLASSES];
3986 int n = classify_argument (mode, type, regclass, 0);
3992 for (n--; n >= 0; n--)
3993 switch (regclass[n])
3995 case X86_64_INTEGER_CLASS:
3996 case X86_64_INTEGERSI_CLASS:
3999 case X86_64_SSE_CLASS:
4000 case X86_64_SSESF_CLASS:
4001 case X86_64_SSEDF_CLASS:
4004 case X86_64_NO_CLASS:
4005 case X86_64_SSEUP_CLASS:
4007 case X86_64_X87_CLASS:
4008 case X86_64_X87UP_CLASS:
4012 case X86_64_COMPLEX_X87_CLASS:
4013 return in_return ? 2 : 0;
4014 case X86_64_MEMORY_CLASS:
4020 /* Construct container for the argument used by GCC interface. See
4021 FUNCTION_ARG for the detailed description. */
4024 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
4025 const_tree type, int in_return, int nintregs, int nsseregs,
4026 const int *intreg, int sse_regno)
4028 /* The following variables hold the static issued_error state. */
4029 static bool issued_sse_arg_error;
4030 static bool issued_sse_ret_error;
4031 static bool issued_x87_ret_error;
4033 enum machine_mode tmpmode;
4035 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
4036 enum x86_64_reg_class regclass[MAX_CLASSES];
4040 int needed_sseregs, needed_intregs;
4041 rtx exp[MAX_CLASSES];
4044 n = classify_argument (mode, type, regclass, 0);
4047 if (!examine_argument (mode, type, in_return, &needed_intregs,
4050 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
4053 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
4054 some less clueful developer tries to use floating-point anyway. */
4055 if (needed_sseregs && !TARGET_SSE)
4059 if (!issued_sse_ret_error)
4061 error ("SSE register return with SSE disabled");
4062 issued_sse_ret_error = true;
4065 else if (!issued_sse_arg_error)
4067 error ("SSE register argument with SSE disabled");
4068 issued_sse_arg_error = true;
4073 /* Likewise, error if the ABI requires us to return values in the
4074 x87 registers and the user specified -mno-80387. */
4075 if (!TARGET_80387 && in_return)
4076 for (i = 0; i < n; i++)
4077 if (regclass[i] == X86_64_X87_CLASS
4078 || regclass[i] == X86_64_X87UP_CLASS
4079 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
4081 if (!issued_x87_ret_error)
4083 error ("x87 register return with x87 disabled");
4084 issued_x87_ret_error = true;
4089 /* First construct simple cases. Avoid SCmode, since we want to use
4090 single register to pass this type. */
4091 if (n == 1 && mode != SCmode)
4092 switch (regclass[0])
4094 case X86_64_INTEGER_CLASS:
4095 case X86_64_INTEGERSI_CLASS:
4096 return gen_rtx_REG (mode, intreg[0]);
4097 case X86_64_SSE_CLASS:
4098 case X86_64_SSESF_CLASS:
4099 case X86_64_SSEDF_CLASS:
4100 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
4101 case X86_64_X87_CLASS:
4102 case X86_64_COMPLEX_X87_CLASS:
4103 return gen_rtx_REG (mode, FIRST_STACK_REG);
4104 case X86_64_NO_CLASS:
4105 /* Zero sized array, struct or class. */
4110 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
4111 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
4112 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
4115 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
4116 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
4117 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
4118 && regclass[1] == X86_64_INTEGER_CLASS
4119 && (mode == CDImode || mode == TImode || mode == TFmode)
4120 && intreg[0] + 1 == intreg[1])
4121 return gen_rtx_REG (mode, intreg[0]);
4123 /* Otherwise figure out the entries of the PARALLEL. */
4124 for (i = 0; i < n; i++)
4126 switch (regclass[i])
4128 case X86_64_NO_CLASS:
4130 case X86_64_INTEGER_CLASS:
4131 case X86_64_INTEGERSI_CLASS:
4132 /* Merge TImodes on aligned occasions here too. */
4133 if (i * 8 + 8 > bytes)
4134 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
4135 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
4139 /* We've requested 24 bytes we don't have mode for. Use DImode. */
4140 if (tmpmode == BLKmode)
4142 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4143 gen_rtx_REG (tmpmode, *intreg),
4147 case X86_64_SSESF_CLASS:
4148 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4149 gen_rtx_REG (SFmode,
4150 SSE_REGNO (sse_regno)),
4154 case X86_64_SSEDF_CLASS:
4155 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4156 gen_rtx_REG (DFmode,
4157 SSE_REGNO (sse_regno)),
4161 case X86_64_SSE_CLASS:
4162 if (i < n - 1 && regclass[i + 1] == X86_64_SSEUP_CLASS)
4166 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4167 gen_rtx_REG (tmpmode,
4168 SSE_REGNO (sse_regno)),
4170 if (tmpmode == TImode)
4179 /* Empty aligned struct, union or class. */
4183 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
4184 for (i = 0; i < nexps; i++)
4185 XVECEXP (ret, 0, i) = exp [i];
4189 /* Update the data in CUM to advance over an argument of mode MODE
4190 and data type TYPE. (TYPE is null for libcalls where that information
4191 may not be available.) */
4194 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4195 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
4211 cum->words += words;
4212 cum->nregs -= words;
4213 cum->regno += words;
4215 if (cum->nregs <= 0)
4223 if (cum->float_in_sse < 2)
4226 if (cum->float_in_sse < 1)
4237 if (!type || !AGGREGATE_TYPE_P (type))
4239 cum->sse_words += words;
4240 cum->sse_nregs -= 1;
4241 cum->sse_regno += 1;
4242 if (cum->sse_nregs <= 0)
4255 if (!type || !AGGREGATE_TYPE_P (type))
4257 cum->mmx_words += words;
4258 cum->mmx_nregs -= 1;
4259 cum->mmx_regno += 1;
4260 if (cum->mmx_nregs <= 0)
4271 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4272 tree type, HOST_WIDE_INT words)
4274 int int_nregs, sse_nregs;
4276 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
4277 cum->words += words;
4278 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
4280 cum->nregs -= int_nregs;
4281 cum->sse_nregs -= sse_nregs;
4282 cum->regno += int_nregs;
4283 cum->sse_regno += sse_nregs;
4286 cum->words += words;
4290 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
4291 HOST_WIDE_INT words)
4293 /* Otherwise, this should be passed indirect. */
4294 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
4296 cum->words += words;
4305 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4306 tree type, int named ATTRIBUTE_UNUSED)
4308 HOST_WIDE_INT bytes, words;
4310 if (mode == BLKmode)
4311 bytes = int_size_in_bytes (type);
4313 bytes = GET_MODE_SIZE (mode);
4314 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4317 mode = type_natural_mode (type);
4319 if (TARGET_64BIT_MS_ABI)
4320 function_arg_advance_ms_64 (cum, bytes, words);
4321 else if (TARGET_64BIT)
4322 function_arg_advance_64 (cum, mode, type, words);
4324 function_arg_advance_32 (cum, mode, type, bytes, words);
4327 /* Define where to put the arguments to a function.
4328 Value is zero to push the argument on the stack,
4329 or a hard register in which to store the argument.
4331 MODE is the argument's machine mode.
4332 TYPE is the data type of the argument (as a tree).
4333 This is null for libcalls where that information may
4335 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4336 the preceding args and about the function being called.
4337 NAMED is nonzero if this argument is a named parameter
4338 (otherwise it is an extra parameter matching an ellipsis). */
4341 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4342 enum machine_mode orig_mode, tree type,
4343 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
4345 static bool warnedsse, warnedmmx;
4347 /* Avoid the AL settings for the Unix64 ABI. */
4348 if (mode == VOIDmode)
4364 if (words <= cum->nregs)
4366 int regno = cum->regno;
4368 /* Fastcall allocates the first two DWORD (SImode) or
4369 smaller arguments to ECX and EDX if it isn't an
4375 || (type && AGGREGATE_TYPE_P (type)))
4378 /* ECX not EAX is the first allocated register. */
4379 if (regno == AX_REG)
4382 return gen_rtx_REG (mode, regno);
4387 if (cum->float_in_sse < 2)
4390 if (cum->float_in_sse < 1)
4400 if (!type || !AGGREGATE_TYPE_P (type))
4402 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
4405 warning (0, "SSE vector argument without SSE enabled "
4409 return gen_reg_or_parallel (mode, orig_mode,
4410 cum->sse_regno + FIRST_SSE_REG);
4419 if (!type || !AGGREGATE_TYPE_P (type))
4421 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4424 warning (0, "MMX vector argument without MMX enabled "
4428 return gen_reg_or_parallel (mode, orig_mode,
4429 cum->mmx_regno + FIRST_MMX_REG);
4438 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4439 enum machine_mode orig_mode, tree type)
4441 /* Handle a hidden AL argument containing number of registers
4442 for varargs x86-64 functions. */
4443 if (mode == VOIDmode)
4444 return GEN_INT (cum->maybe_vaarg
4445 ? (cum->sse_nregs < 0
4450 return construct_container (mode, orig_mode, type, 0, cum->nregs,
4452 &x86_64_int_parameter_registers [cum->regno],
4457 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4458 enum machine_mode orig_mode, int named,
4459 HOST_WIDE_INT bytes)
4463 /* Avoid the AL settings for the Unix64 ABI. */
4464 if (mode == VOIDmode)
4467 /* If we've run out of registers, it goes on the stack. */
4468 if (cum->nregs == 0)
4471 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
4473 /* Only floating point modes are passed in anything but integer regs. */
4474 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
4477 regno = cum->regno + FIRST_SSE_REG;
4482 /* Unnamed floating parameters are passed in both the
4483 SSE and integer registers. */
4484 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
4485 t2 = gen_rtx_REG (mode, regno);
4486 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
4487 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
4488 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
4491 /* Handle aggregated types passed in register. */
4492 if (orig_mode == BLKmode)
4494 if (bytes > 0 && bytes <= 8)
4495 mode = (bytes > 4 ? DImode : SImode);
4496 if (mode == BLKmode)
4500 return gen_reg_or_parallel (mode, orig_mode, regno);
4504 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
4505 tree type, int named)
4507 enum machine_mode mode = omode;
4508 HOST_WIDE_INT bytes, words;
4510 if (mode == BLKmode)
4511 bytes = int_size_in_bytes (type);
4513 bytes = GET_MODE_SIZE (mode);
4514 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4516 /* To simplify the code below, represent vector types with a vector mode
4517 even if MMX/SSE are not active. */
4518 if (type && TREE_CODE (type) == VECTOR_TYPE)
4519 mode = type_natural_mode (type);
4521 if (TARGET_64BIT_MS_ABI)
4522 return function_arg_ms_64 (cum, mode, omode, named, bytes);
4523 else if (TARGET_64BIT)
4524 return function_arg_64 (cum, mode, omode, type);
4526 return function_arg_32 (cum, mode, omode, type, bytes, words);
4529 /* A C expression that indicates when an argument must be passed by
4530 reference. If nonzero for an argument, a copy of that argument is
4531 made in memory and a pointer to the argument is passed instead of
4532 the argument itself. The pointer is passed in whatever way is
4533 appropriate for passing a pointer to that type. */
4536 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4537 enum machine_mode mode ATTRIBUTE_UNUSED,
4538 const_tree type, bool named ATTRIBUTE_UNUSED)
4540 /* See Windows x64 Software Convention. */
4541 if (TARGET_64BIT_MS_ABI)
4543 int msize = (int) GET_MODE_SIZE (mode);
4546 /* Arrays are passed by reference. */
4547 if (TREE_CODE (type) == ARRAY_TYPE)
4550 if (AGGREGATE_TYPE_P (type))
4552 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
4553 are passed by reference. */
4554 msize = int_size_in_bytes (type);
4558 /* __m128 is passed by reference. */
4560 case 1: case 2: case 4: case 8:
4566 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
4572 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4573 ABI. Only called if TARGET_SSE. */
4575 contains_128bit_aligned_vector_p (tree type)
4577 enum machine_mode mode = TYPE_MODE (type);
4578 if (SSE_REG_MODE_P (mode)
4579 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4581 if (TYPE_ALIGN (type) < 128)
4584 if (AGGREGATE_TYPE_P (type))
4586 /* Walk the aggregates recursively. */
4587 switch (TREE_CODE (type))
4591 case QUAL_UNION_TYPE:
4595 /* Walk all the structure fields. */
4596 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4598 if (TREE_CODE (field) == FIELD_DECL
4599 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4606 /* Just for use if some languages passes arrays by value. */
4607 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4618 /* Gives the alignment boundary, in bits, of an argument with the
4619 specified mode and type. */
4622 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4626 align = TYPE_ALIGN (type);
4628 align = GET_MODE_ALIGNMENT (mode);
4629 if (align < PARM_BOUNDARY)
4630 align = PARM_BOUNDARY;
4631 /* Decimal floating point is aligned to its natural boundary. */
4632 if (!TARGET_64BIT && !VALID_DFP_MODE_P (mode))
4634 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4635 make an exception for SSE modes since these require 128bit
4638 The handling here differs from field_alignment. ICC aligns MMX
4639 arguments to 4 byte boundaries, while structure fields are aligned
4640 to 8 byte boundaries. */
4642 align = PARM_BOUNDARY;
4645 if (!SSE_REG_MODE_P (mode))
4646 align = PARM_BOUNDARY;
4650 if (!contains_128bit_aligned_vector_p (type))
4651 align = PARM_BOUNDARY;
4654 if (align > BIGGEST_ALIGNMENT)
4655 align = BIGGEST_ALIGNMENT;
4659 /* Return true if N is a possible register number of function value. */
4662 ix86_function_value_regno_p (int regno)
4669 case FIRST_FLOAT_REG:
4670 if (TARGET_64BIT_MS_ABI)
4672 return TARGET_FLOAT_RETURNS_IN_80387;
4678 if (TARGET_MACHO || TARGET_64BIT)
4686 /* Define how to find the value returned by a function.
4687 VALTYPE is the data type of the value (as a tree).
4688 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4689 otherwise, FUNC is 0. */
4692 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
4693 const_tree fntype, const_tree fn)
4697 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4698 we normally prevent this case when mmx is not available. However
4699 some ABIs may require the result to be returned like DImode. */
4700 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4701 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
4703 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4704 we prevent this case when sse is not available. However some ABIs
4705 may require the result to be returned like integer TImode. */
4706 else if (mode == TImode
4707 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4708 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
4710 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4711 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4712 regno = FIRST_FLOAT_REG;
4714 /* Most things go in %eax. */
4717 /* Override FP return register with %xmm0 for local functions when
4718 SSE math is enabled or for functions with sseregparm attribute. */
4719 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4721 int sse_level = ix86_function_sseregparm (fntype, fn, false);
4722 if ((sse_level >= 1 && mode == SFmode)
4723 || (sse_level == 2 && mode == DFmode))
4724 regno = FIRST_SSE_REG;
4727 return gen_rtx_REG (orig_mode, regno);
4731 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
4736 /* Handle libcalls, which don't provide a type node. */
4737 if (valtype == NULL)
4749 return gen_rtx_REG (mode, FIRST_SSE_REG);
4752 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4756 return gen_rtx_REG (mode, AX_REG);
4760 ret = construct_container (mode, orig_mode, valtype, 1,
4761 REGPARM_MAX, SSE_REGPARM_MAX,
4762 x86_64_int_return_registers, 0);
4764 /* For zero sized structures, construct_container returns NULL, but we
4765 need to keep rest of compiler happy by returning meaningful value. */
4767 ret = gen_rtx_REG (orig_mode, AX_REG);
4773 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
4775 unsigned int regno = AX_REG;
4779 switch (GET_MODE_SIZE (mode))
4782 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
4783 && !COMPLEX_MODE_P (mode))
4784 regno = FIRST_SSE_REG;
4788 if (mode == SFmode || mode == DFmode)
4789 regno = FIRST_SSE_REG;
4795 return gen_rtx_REG (orig_mode, regno);
4799 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
4800 enum machine_mode orig_mode, enum machine_mode mode)
4802 const_tree fn, fntype;
4805 if (fntype_or_decl && DECL_P (fntype_or_decl))
4806 fn = fntype_or_decl;
4807 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4809 if (TARGET_64BIT_MS_ABI)
4810 return function_value_ms_64 (orig_mode, mode);
4811 else if (TARGET_64BIT)
4812 return function_value_64 (orig_mode, mode, valtype);
4814 return function_value_32 (orig_mode, mode, fntype, fn);
4818 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
4819 bool outgoing ATTRIBUTE_UNUSED)
4821 enum machine_mode mode, orig_mode;
4823 orig_mode = TYPE_MODE (valtype);
4824 mode = type_natural_mode (valtype);
4825 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4829 ix86_libcall_value (enum machine_mode mode)
4831 return ix86_function_value_1 (NULL, NULL, mode, mode);
4834 /* Return true iff type is returned in memory. */
4837 return_in_memory_32 (const_tree type, enum machine_mode mode)
4841 if (mode == BLKmode)
4844 size = int_size_in_bytes (type);
4846 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4849 if (VECTOR_MODE_P (mode) || mode == TImode)
4851 /* User-created vectors small enough to fit in EAX. */
4855 /* MMX/3dNow values are returned in MM0,
4856 except when it doesn't exits. */
4858 return (TARGET_MMX ? 0 : 1);
4860 /* SSE values are returned in XMM0, except when it doesn't exist. */
4862 return (TARGET_SSE ? 0 : 1);
4877 return_in_memory_64 (const_tree type, enum machine_mode mode)
4879 int needed_intregs, needed_sseregs;
4880 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4884 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
4886 HOST_WIDE_INT size = int_size_in_bytes (type);
4888 /* __m128 is returned in xmm0. */
4889 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
4890 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
4893 /* Otherwise, the size must be exactly in [1248]. */
4894 return (size != 1 && size != 2 && size != 4 && size != 8);
4898 ix86_return_in_memory (const_tree type)
4900 const enum machine_mode mode = type_natural_mode (type);
4902 if (TARGET_64BIT_MS_ABI)
4903 return return_in_memory_ms_64 (type, mode);
4904 else if (TARGET_64BIT)
4905 return return_in_memory_64 (type, mode);
4907 return return_in_memory_32 (type, mode);
4910 /* Return false iff TYPE is returned in memory. This version is used
4911 on Solaris 10. It is similar to the generic ix86_return_in_memory,
4912 but differs notably in that when MMX is available, 8-byte vectors
4913 are returned in memory, rather than in MMX registers. */
4916 ix86_sol10_return_in_memory (const_tree type)
4919 enum machine_mode mode = type_natural_mode (type);
4922 return return_in_memory_64 (type, mode);
4924 if (mode == BLKmode)
4927 size = int_size_in_bytes (type);
4929 if (VECTOR_MODE_P (mode))
4931 /* Return in memory only if MMX registers *are* available. This
4932 seems backwards, but it is consistent with the existing
4939 else if (mode == TImode)
4941 else if (mode == XFmode)
4947 /* When returning SSE vector types, we have a choice of either
4948 (1) being abi incompatible with a -march switch, or
4949 (2) generating an error.
4950 Given no good solution, I think the safest thing is one warning.
4951 The user won't be able to use -Werror, but....
4953 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4954 called in response to actually generating a caller or callee that
4955 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4956 via aggregate_value_p for general type probing from tree-ssa. */
4959 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4961 static bool warnedsse, warnedmmx;
4963 if (!TARGET_64BIT && type)
4965 /* Look at the return type of the function, not the function type. */
4966 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4968 if (!TARGET_SSE && !warnedsse)
4971 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4974 warning (0, "SSE vector return without SSE enabled "
4979 if (!TARGET_MMX && !warnedmmx)
4981 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4984 warning (0, "MMX vector return without MMX enabled "
4994 /* Create the va_list data type. */
4997 ix86_build_builtin_va_list (void)
4999 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
5001 /* For i386 we use plain pointer to argument area. */
5002 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
5003 return build_pointer_type (char_type_node);
5005 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
5006 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
5008 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
5009 unsigned_type_node);
5010 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
5011 unsigned_type_node);
5012 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
5014 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
5017 va_list_gpr_counter_field = f_gpr;
5018 va_list_fpr_counter_field = f_fpr;
5020 DECL_FIELD_CONTEXT (f_gpr) = record;
5021 DECL_FIELD_CONTEXT (f_fpr) = record;
5022 DECL_FIELD_CONTEXT (f_ovf) = record;
5023 DECL_FIELD_CONTEXT (f_sav) = record;
5025 TREE_CHAIN (record) = type_decl;
5026 TYPE_NAME (record) = type_decl;
5027 TYPE_FIELDS (record) = f_gpr;
5028 TREE_CHAIN (f_gpr) = f_fpr;
5029 TREE_CHAIN (f_fpr) = f_ovf;
5030 TREE_CHAIN (f_ovf) = f_sav;
5032 layout_type (record);
5034 /* The correct type is an array type of one element. */
5035 return build_array_type (record, build_index_type (size_zero_node));
5038 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
5041 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
5051 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
5054 /* Indicate to allocate space on the stack for varargs save area. */
5055 ix86_save_varrargs_registers = 1;
5056 /* We need 16-byte stack alignment to save SSE registers. If user
5057 asked for lower preferred_stack_boundary, lets just hope that he knows
5058 what he is doing and won't varargs SSE values.
5060 We also may end up assuming that only 64bit values are stored in SSE
5061 register let some floating point program work. */
5062 if (ix86_preferred_stack_boundary >= BIGGEST_ALIGNMENT)
5063 cfun->stack_alignment_needed = BIGGEST_ALIGNMENT;
5065 save_area = frame_pointer_rtx;
5066 set = get_varargs_alias_set ();
5068 for (i = cum->regno;
5070 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
5073 mem = gen_rtx_MEM (Pmode,
5074 plus_constant (save_area, i * UNITS_PER_WORD));
5075 MEM_NOTRAP_P (mem) = 1;
5076 set_mem_alias_set (mem, set);
5077 emit_move_insn (mem, gen_rtx_REG (Pmode,
5078 x86_64_int_parameter_registers[i]));
5081 if (cum->sse_nregs && cfun->va_list_fpr_size)
5083 /* Now emit code to save SSE registers. The AX parameter contains number
5084 of SSE parameter registers used to call this function. We use
5085 sse_prologue_save insn template that produces computed jump across
5086 SSE saves. We need some preparation work to get this working. */
5088 label = gen_label_rtx ();
5089 label_ref = gen_rtx_LABEL_REF (Pmode, label);
5091 /* Compute address to jump to :
5092 label - 5*eax + nnamed_sse_arguments*5 */
5093 tmp_reg = gen_reg_rtx (Pmode);
5094 nsse_reg = gen_reg_rtx (Pmode);
5095 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
5096 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
5097 gen_rtx_MULT (Pmode, nsse_reg,
5102 gen_rtx_CONST (DImode,
5103 gen_rtx_PLUS (DImode,
5105 GEN_INT (cum->sse_regno * 4))));
5107 emit_move_insn (nsse_reg, label_ref);
5108 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
5110 /* Compute address of memory block we save into. We always use pointer
5111 pointing 127 bytes after first byte to store - this is needed to keep
5112 instruction size limited by 4 bytes. */
5113 tmp_reg = gen_reg_rtx (Pmode);
5114 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
5115 plus_constant (save_area,
5116 8 * REGPARM_MAX + 127)));
5117 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
5118 MEM_NOTRAP_P (mem) = 1;
5119 set_mem_alias_set (mem, set);
5120 set_mem_align (mem, BITS_PER_WORD);
5122 /* And finally do the dirty job! */
5123 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
5124 GEN_INT (cum->sse_regno), label));
5129 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
5131 alias_set_type set = get_varargs_alias_set ();
5134 for (i = cum->regno; i < REGPARM_MAX; i++)
5138 mem = gen_rtx_MEM (Pmode,
5139 plus_constant (virtual_incoming_args_rtx,
5140 i * UNITS_PER_WORD));
5141 MEM_NOTRAP_P (mem) = 1;
5142 set_mem_alias_set (mem, set);
5144 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
5145 emit_move_insn (mem, reg);
5150 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5151 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5154 CUMULATIVE_ARGS next_cum;
5157 /* This argument doesn't appear to be used anymore. Which is good,
5158 because the old code here didn't suppress rtl generation. */
5159 gcc_assert (!no_rtl);
5164 fntype = TREE_TYPE (current_function_decl);
5166 /* For varargs, we do not want to skip the dummy va_dcl argument.
5167 For stdargs, we do want to skip the last named argument. */
5169 if (stdarg_p (fntype))
5170 function_arg_advance (&next_cum, mode, type, 1);
5172 if (TARGET_64BIT_MS_ABI)
5173 setup_incoming_varargs_ms_64 (&next_cum);
5175 setup_incoming_varargs_64 (&next_cum);
5178 /* Implement va_start. */
5181 ix86_va_start (tree valist, rtx nextarg)
5183 HOST_WIDE_INT words, n_gpr, n_fpr;
5184 tree f_gpr, f_fpr, f_ovf, f_sav;
5185 tree gpr, fpr, ovf, sav, t;
5188 /* Only 64bit target needs something special. */
5189 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
5191 std_expand_builtin_va_start (valist, nextarg);
5195 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
5196 f_fpr = TREE_CHAIN (f_gpr);
5197 f_ovf = TREE_CHAIN (f_fpr);
5198 f_sav = TREE_CHAIN (f_ovf);
5200 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
5201 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
5202 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
5203 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
5204 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
5206 /* Count number of gp and fp argument registers used. */
5207 words = crtl->args.info.words;
5208 n_gpr = crtl->args.info.regno;
5209 n_fpr = crtl->args.info.sse_regno;
5211 if (cfun->va_list_gpr_size)
5213 type = TREE_TYPE (gpr);
5214 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
5215 build_int_cst (type, n_gpr * 8));
5216 TREE_SIDE_EFFECTS (t) = 1;
5217 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5220 if (cfun->va_list_fpr_size)
5222 type = TREE_TYPE (fpr);
5223 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
5224 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
5225 TREE_SIDE_EFFECTS (t) = 1;
5226 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5229 /* Find the overflow area. */
5230 type = TREE_TYPE (ovf);
5231 t = make_tree (type, virtual_incoming_args_rtx);
5233 t = build2 (POINTER_PLUS_EXPR, type, t,
5234 size_int (words * UNITS_PER_WORD));
5235 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
5236 TREE_SIDE_EFFECTS (t) = 1;
5237 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5239 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
5241 /* Find the register save area.
5242 Prologue of the function save it right above stack frame. */
5243 type = TREE_TYPE (sav);
5244 t = make_tree (type, frame_pointer_rtx);
5245 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
5246 TREE_SIDE_EFFECTS (t) = 1;
5247 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5251 /* Implement va_arg. */
5254 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
5256 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
5257 tree f_gpr, f_fpr, f_ovf, f_sav;
5258 tree gpr, fpr, ovf, sav, t;
5260 tree lab_false, lab_over = NULL_TREE;
5265 enum machine_mode nat_mode;
5267 /* Only 64bit target needs something special. */
5268 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
5269 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
5271 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
5272 f_fpr = TREE_CHAIN (f_gpr);
5273 f_ovf = TREE_CHAIN (f_fpr);
5274 f_sav = TREE_CHAIN (f_ovf);
5276 valist = build_va_arg_indirect_ref (valist);
5277 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
5278 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
5279 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
5280 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
5282 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5284 type = build_pointer_type (type);
5285 size = int_size_in_bytes (type);
5286 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5288 nat_mode = type_natural_mode (type);
5289 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
5290 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
5292 /* Pull the value out of the saved registers. */
5294 addr = create_tmp_var (ptr_type_node, "addr");
5295 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
5299 int needed_intregs, needed_sseregs;
5301 tree int_addr, sse_addr;
5303 lab_false = create_artificial_label ();
5304 lab_over = create_artificial_label ();
5306 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
5308 need_temp = (!REG_P (container)
5309 && ((needed_intregs && TYPE_ALIGN (type) > 64)
5310 || TYPE_ALIGN (type) > 128));
5312 /* In case we are passing structure, verify that it is consecutive block
5313 on the register save area. If not we need to do moves. */
5314 if (!need_temp && !REG_P (container))
5316 /* Verify that all registers are strictly consecutive */
5317 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
5321 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5323 rtx slot = XVECEXP (container, 0, i);
5324 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
5325 || INTVAL (XEXP (slot, 1)) != i * 16)
5333 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5335 rtx slot = XVECEXP (container, 0, i);
5336 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
5337 || INTVAL (XEXP (slot, 1)) != i * 8)
5349 int_addr = create_tmp_var (ptr_type_node, "int_addr");
5350 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
5351 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
5352 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
5355 /* First ensure that we fit completely in registers. */
5358 t = build_int_cst (TREE_TYPE (gpr),
5359 (REGPARM_MAX - needed_intregs + 1) * 8);
5360 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
5361 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5362 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5363 gimplify_and_add (t, pre_p);
5367 t = build_int_cst (TREE_TYPE (fpr),
5368 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
5370 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
5371 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5372 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5373 gimplify_and_add (t, pre_p);
5376 /* Compute index to start of area used for integer regs. */
5379 /* int_addr = gpr + sav; */
5380 t = fold_convert (sizetype, gpr);
5381 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
5382 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
5383 gimplify_and_add (t, pre_p);
5387 /* sse_addr = fpr + sav; */
5388 t = fold_convert (sizetype, fpr);
5389 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
5390 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
5391 gimplify_and_add (t, pre_p);
5396 tree temp = create_tmp_var (type, "va_arg_tmp");
5399 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
5400 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5401 gimplify_and_add (t, pre_p);
5403 for (i = 0; i < XVECLEN (container, 0); i++)
5405 rtx slot = XVECEXP (container, 0, i);
5406 rtx reg = XEXP (slot, 0);
5407 enum machine_mode mode = GET_MODE (reg);
5408 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
5409 tree addr_type = build_pointer_type (piece_type);
5412 tree dest_addr, dest;
5414 if (SSE_REGNO_P (REGNO (reg)))
5416 src_addr = sse_addr;
5417 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
5421 src_addr = int_addr;
5422 src_offset = REGNO (reg) * 8;
5424 src_addr = fold_convert (addr_type, src_addr);
5425 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
5426 size_int (src_offset));
5427 src = build_va_arg_indirect_ref (src_addr);
5429 dest_addr = fold_convert (addr_type, addr);
5430 dest_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, dest_addr,
5431 size_int (INTVAL (XEXP (slot, 1))));
5432 dest = build_va_arg_indirect_ref (dest_addr);
5434 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
5435 gimplify_and_add (t, pre_p);
5441 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
5442 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
5443 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
5444 gimplify_and_add (t, pre_p);
5448 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
5449 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
5450 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
5451 gimplify_and_add (t, pre_p);
5454 t = build1 (GOTO_EXPR, void_type_node, lab_over);
5455 gimplify_and_add (t, pre_p);
5457 t = build1 (LABEL_EXPR, void_type_node, lab_false);
5458 append_to_statement_list (t, pre_p);
5461 /* ... otherwise out of the overflow area. */
5463 /* Care for on-stack alignment if needed. */
5464 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
5465 || integer_zerop (TYPE_SIZE (type)))
5469 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
5470 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
5471 size_int (align - 1));
5472 t = fold_convert (sizetype, t);
5473 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5475 t = fold_convert (TREE_TYPE (ovf), t);
5477 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
5479 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5480 gimplify_and_add (t2, pre_p);
5482 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
5483 size_int (rsize * UNITS_PER_WORD));
5484 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
5485 gimplify_and_add (t, pre_p);
5489 t = build1 (LABEL_EXPR, void_type_node, lab_over);
5490 append_to_statement_list (t, pre_p);
5493 ptrtype = build_pointer_type (type);
5494 addr = fold_convert (ptrtype, addr);
5497 addr = build_va_arg_indirect_ref (addr);
5498 return build_va_arg_indirect_ref (addr);
5501 /* Return nonzero if OPNUM's MEM should be matched
5502 in movabs* patterns. */
5505 ix86_check_movabs (rtx insn, int opnum)
5509 set = PATTERN (insn);
5510 if (GET_CODE (set) == PARALLEL)
5511 set = XVECEXP (set, 0, 0);
5512 gcc_assert (GET_CODE (set) == SET);
5513 mem = XEXP (set, opnum);
5514 while (GET_CODE (mem) == SUBREG)
5515 mem = SUBREG_REG (mem);
5516 gcc_assert (MEM_P (mem));
5517 return (volatile_ok || !MEM_VOLATILE_P (mem));
5520 /* Initialize the table of extra 80387 mathematical constants. */
5523 init_ext_80387_constants (void)
5525 static const char * cst[5] =
5527 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5528 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5529 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5530 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5531 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5535 for (i = 0; i < 5; i++)
5537 real_from_string (&ext_80387_constants_table[i], cst[i]);
5538 /* Ensure each constant is rounded to XFmode precision. */
5539 real_convert (&ext_80387_constants_table[i],
5540 XFmode, &ext_80387_constants_table[i]);
5543 ext_80387_constants_init = 1;
5546 /* Return true if the constant is something that can be loaded with
5547 a special instruction. */
5550 standard_80387_constant_p (rtx x)
5552 enum machine_mode mode = GET_MODE (x);
5556 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
5559 if (x == CONST0_RTX (mode))
5561 if (x == CONST1_RTX (mode))
5564 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5566 /* For XFmode constants, try to find a special 80387 instruction when
5567 optimizing for size or on those CPUs that benefit from them. */
5569 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
5573 if (! ext_80387_constants_init)
5574 init_ext_80387_constants ();
5576 for (i = 0; i < 5; i++)
5577 if (real_identical (&r, &ext_80387_constants_table[i]))
5581 /* Load of the constant -0.0 or -1.0 will be split as
5582 fldz;fchs or fld1;fchs sequence. */
5583 if (real_isnegzero (&r))
5585 if (real_identical (&r, &dconstm1))
5591 /* Return the opcode of the special instruction to be used to load
5595 standard_80387_constant_opcode (rtx x)
5597 switch (standard_80387_constant_p (x))
5621 /* Return the CONST_DOUBLE representing the 80387 constant that is
5622 loaded by the specified special instruction. The argument IDX
5623 matches the return value from standard_80387_constant_p. */
5626 standard_80387_constant_rtx (int idx)
5630 if (! ext_80387_constants_init)
5631 init_ext_80387_constants ();
5647 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5651 /* Return 1 if mode is a valid mode for sse. */
5653 standard_sse_mode_p (enum machine_mode mode)
5670 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5673 standard_sse_constant_p (rtx x)
5675 enum machine_mode mode = GET_MODE (x);
5677 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5679 if (vector_all_ones_operand (x, mode)
5680 && standard_sse_mode_p (mode))
5681 return TARGET_SSE2 ? 2 : -1;
5686 /* Return the opcode of the special instruction to be used to load
5690 standard_sse_constant_opcode (rtx insn, rtx x)
5692 switch (standard_sse_constant_p (x))
5695 if (get_attr_mode (insn) == MODE_V4SF)
5696 return "xorps\t%0, %0";
5697 else if (get_attr_mode (insn) == MODE_V2DF)
5698 return "xorpd\t%0, %0";
5700 return "pxor\t%0, %0";
5702 return "pcmpeqd\t%0, %0";
5707 /* Returns 1 if OP contains a symbol reference */
5710 symbolic_reference_mentioned_p (rtx op)
5715 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5718 fmt = GET_RTX_FORMAT (GET_CODE (op));
5719 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5725 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5726 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5730 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5737 /* Return 1 if it is appropriate to emit `ret' instructions in the
5738 body of a function. Do this only if the epilogue is simple, needing a
5739 couple of insns. Prior to reloading, we can't tell how many registers
5740 must be saved, so return 0 then. Return 0 if there is no frame
5741 marker to de-allocate. */
5744 ix86_can_use_return_insn_p (void)
5746 struct ix86_frame frame;
5748 if (! reload_completed || frame_pointer_needed)
5751 /* Don't allow more than 32 pop, since that's all we can do
5752 with one instruction. */
5753 if (crtl->args.pops_args
5754 && crtl->args.size >= 32768)
5757 ix86_compute_frame_layout (&frame);
5758 return frame.to_allocate == 0 && frame.nregs == 0;
5761 /* Value should be nonzero if functions must have frame pointers.
5762 Zero means the frame pointer need not be set up (and parms may
5763 be accessed via the stack pointer) in functions that seem suitable. */
5766 ix86_frame_pointer_required (void)
5768 /* If we accessed previous frames, then the generated code expects
5769 to be able to access the saved ebp value in our frame. */
5770 if (cfun->machine->accesses_prev_frame)
5773 /* Several x86 os'es need a frame pointer for other reasons,
5774 usually pertaining to setjmp. */
5775 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5778 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5779 the frame pointer by default. Turn it back on now if we've not
5780 got a leaf function. */
5781 if (TARGET_OMIT_LEAF_FRAME_POINTER
5782 && (!current_function_is_leaf
5783 || ix86_current_function_calls_tls_descriptor))
5786 if (current_function_profile)
5792 /* Record that the current function accesses previous call frames. */
5795 ix86_setup_frame_addresses (void)
5797 cfun->machine->accesses_prev_frame = 1;
5800 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5801 # define USE_HIDDEN_LINKONCE 1
5803 # define USE_HIDDEN_LINKONCE 0
5806 static int pic_labels_used;
5808 /* Fills in the label name that should be used for a pc thunk for
5809 the given register. */
5812 get_pc_thunk_name (char name[32], unsigned int regno)
5814 gcc_assert (!TARGET_64BIT);
5816 if (USE_HIDDEN_LINKONCE)
5817 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5819 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5823 /* This function generates code for -fpic that loads %ebx with
5824 the return address of the caller and then returns. */
5827 ix86_file_end (void)
5832 for (regno = 0; regno < 8; ++regno)
5836 if (! ((pic_labels_used >> regno) & 1))
5839 get_pc_thunk_name (name, regno);
5844 switch_to_section (darwin_sections[text_coal_section]);
5845 fputs ("\t.weak_definition\t", asm_out_file);
5846 assemble_name (asm_out_file, name);
5847 fputs ("\n\t.private_extern\t", asm_out_file);
5848 assemble_name (asm_out_file, name);
5849 fputs ("\n", asm_out_file);
5850 ASM_OUTPUT_LABEL (asm_out_file, name);
5854 if (USE_HIDDEN_LINKONCE)
5858 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5860 TREE_PUBLIC (decl) = 1;
5861 TREE_STATIC (decl) = 1;
5862 DECL_ONE_ONLY (decl) = 1;
5864 (*targetm.asm_out.unique_section) (decl, 0);
5865 switch_to_section (get_named_section (decl, NULL, 0));
5867 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5868 fputs ("\t.hidden\t", asm_out_file);
5869 assemble_name (asm_out_file, name);
5870 fputc ('\n', asm_out_file);
5871 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5875 switch_to_section (text_section);
5876 ASM_OUTPUT_LABEL (asm_out_file, name);
5878 if (TARGET_64BIT_MS_ABI)
5880 xops[0] = gen_rtx_REG (Pmode, regno);
5881 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
5882 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
5883 output_asm_insn ("ret", xops);
5887 xops[0] = gen_rtx_REG (SImode, regno);
5888 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5889 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5890 output_asm_insn ("ret", xops);
5894 if (NEED_INDICATE_EXEC_STACK)
5895 file_end_indicate_exec_stack ();
5898 /* Emit code for the SET_GOT patterns. */
5901 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5907 if (TARGET_VXWORKS_RTP && flag_pic)
5909 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5910 xops[2] = gen_rtx_MEM (Pmode,
5911 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
5912 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5914 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5915 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5916 an unadorned address. */
5917 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5918 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
5919 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
5923 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5925 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5927 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5930 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5932 output_asm_insn ("call\t%a2", xops);
5935 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5936 is what will be referenced by the Mach-O PIC subsystem. */
5938 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5941 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5942 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5945 output_asm_insn ("pop{l}\t%0", xops);
5950 get_pc_thunk_name (name, REGNO (dest));
5951 pic_labels_used |= 1 << REGNO (dest);
5953 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5954 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5955 output_asm_insn ("call\t%X2", xops);
5956 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5957 is what will be referenced by the Mach-O PIC subsystem. */
5960 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5962 targetm.asm_out.internal_label (asm_out_file, "L",
5963 CODE_LABEL_NUMBER (label));
5970 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5971 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5973 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5978 /* Generate an "push" pattern for input ARG. */
5983 return gen_rtx_SET (VOIDmode,
5985 gen_rtx_PRE_DEC (Pmode,
5986 stack_pointer_rtx)),
5990 /* Return >= 0 if there is an unused call-clobbered register available
5991 for the entire function. */
5994 ix86_select_alt_pic_regnum (void)
5996 if (current_function_is_leaf && !current_function_profile
5997 && !ix86_current_function_calls_tls_descriptor)
6000 for (i = 2; i >= 0; --i)
6001 if (!df_regs_ever_live_p (i))
6005 return INVALID_REGNUM;
6008 /* Return 1 if we need to save REGNO. */
6010 ix86_save_reg (unsigned int regno, int maybe_eh_return)
6012 if (pic_offset_table_rtx
6013 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
6014 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6015 || current_function_profile
6016 || current_function_calls_eh_return
6017 || current_function_uses_const_pool))
6019 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
6024 if (current_function_calls_eh_return && maybe_eh_return)
6029 unsigned test = EH_RETURN_DATA_REGNO (i);
6030 if (test == INVALID_REGNUM)
6037 if (cfun->machine->force_align_arg_pointer
6038 && regno == REGNO (cfun->machine->force_align_arg_pointer))
6041 return (df_regs_ever_live_p (regno)
6042 && !call_used_regs[regno]
6043 && !fixed_regs[regno]
6044 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
6047 /* Return number of registers to be saved on the stack. */
6050 ix86_nsaved_regs (void)
6055 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
6056 if (ix86_save_reg (regno, true))
6061 /* Return the offset between two registers, one to be eliminated, and the other
6062 its replacement, at the start of a routine. */
6065 ix86_initial_elimination_offset (int from, int to)
6067 struct ix86_frame frame;
6068 ix86_compute_frame_layout (&frame);
6070 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
6071 return frame.hard_frame_pointer_offset;
6072 else if (from == FRAME_POINTER_REGNUM
6073 && to == HARD_FRAME_POINTER_REGNUM)
6074 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
6077 gcc_assert (to == STACK_POINTER_REGNUM);
6079 if (from == ARG_POINTER_REGNUM)
6080 return frame.stack_pointer_offset;
6082 gcc_assert (from == FRAME_POINTER_REGNUM);
6083 return frame.stack_pointer_offset - frame.frame_pointer_offset;
6087 /* Fill structure ix86_frame about frame of currently computed function. */
6090 ix86_compute_frame_layout (struct ix86_frame *frame)
6092 HOST_WIDE_INT total_size;
6093 unsigned int stack_alignment_needed;
6094 HOST_WIDE_INT offset;
6095 unsigned int preferred_alignment;
6096 HOST_WIDE_INT size = get_frame_size ();
6098 frame->nregs = ix86_nsaved_regs ();
6101 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
6102 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
6104 /* During reload iteration the amount of registers saved can change.
6105 Recompute the value as needed. Do not recompute when amount of registers
6106 didn't change as reload does multiple calls to the function and does not
6107 expect the decision to change within single iteration. */
6109 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
6111 int count = frame->nregs;
6113 cfun->machine->use_fast_prologue_epilogue_nregs = count;
6114 /* The fast prologue uses move instead of push to save registers. This
6115 is significantly longer, but also executes faster as modern hardware
6116 can execute the moves in parallel, but can't do that for push/pop.
6118 Be careful about choosing what prologue to emit: When function takes
6119 many instructions to execute we may use slow version as well as in
6120 case function is known to be outside hot spot (this is known with
6121 feedback only). Weight the size of function by number of registers
6122 to save as it is cheap to use one or two push instructions but very
6123 slow to use many of them. */
6125 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
6126 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
6127 || (flag_branch_probabilities
6128 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
6129 cfun->machine->use_fast_prologue_epilogue = false;
6131 cfun->machine->use_fast_prologue_epilogue
6132 = !expensive_function_p (count);
6134 if (TARGET_PROLOGUE_USING_MOVE
6135 && cfun->machine->use_fast_prologue_epilogue)
6136 frame->save_regs_using_mov = true;
6138 frame->save_regs_using_mov = false;
6141 /* Skip return address and saved base pointer. */
6142 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
6144 frame->hard_frame_pointer_offset = offset;
6146 /* Do some sanity checking of stack_alignment_needed and
6147 preferred_alignment, since i386 port is the only using those features
6148 that may break easily. */
6150 gcc_assert (!size || stack_alignment_needed);
6151 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
6152 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
6153 gcc_assert (stack_alignment_needed
6154 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
6156 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
6157 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
6159 /* Register save area */
6160 offset += frame->nregs * UNITS_PER_WORD;
6163 if (ix86_save_varrargs_registers)
6165 offset += X86_64_VARARGS_SIZE;
6166 frame->va_arg_size = X86_64_VARARGS_SIZE;
6169 frame->va_arg_size = 0;
6171 /* Align start of frame for local function. */
6172 frame->padding1 = ((offset + stack_alignment_needed - 1)
6173 & -stack_alignment_needed) - offset;
6175 offset += frame->padding1;
6177 /* Frame pointer points here. */
6178 frame->frame_pointer_offset = offset;
6182 /* Add outgoing arguments area. Can be skipped if we eliminated
6183 all the function calls as dead code.
6184 Skipping is however impossible when function calls alloca. Alloca
6185 expander assumes that last crtl->outgoing_args_size
6186 of stack frame are unused. */
6187 if (ACCUMULATE_OUTGOING_ARGS
6188 && (!current_function_is_leaf || current_function_calls_alloca
6189 || ix86_current_function_calls_tls_descriptor))
6191 offset += crtl->outgoing_args_size;
6192 frame->outgoing_arguments_size = crtl->outgoing_args_size;
6195 frame->outgoing_arguments_size = 0;
6197 /* Align stack boundary. Only needed if we're calling another function
6199 if (!current_function_is_leaf || current_function_calls_alloca
6200 || ix86_current_function_calls_tls_descriptor)
6201 frame->padding2 = ((offset + preferred_alignment - 1)
6202 & -preferred_alignment) - offset;
6204 frame->padding2 = 0;
6206 offset += frame->padding2;
6208 /* We've reached end of stack frame. */
6209 frame->stack_pointer_offset = offset;
6211 /* Size prologue needs to allocate. */
6212 frame->to_allocate =
6213 (size + frame->padding1 + frame->padding2
6214 + frame->outgoing_arguments_size + frame->va_arg_size);
6216 if ((!frame->to_allocate && frame->nregs <= 1)
6217 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
6218 frame->save_regs_using_mov = false;
6220 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
6221 && current_function_is_leaf
6222 && !ix86_current_function_calls_tls_descriptor)
6224 frame->red_zone_size = frame->to_allocate;
6225 if (frame->save_regs_using_mov)
6226 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
6227 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
6228 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
6231 frame->red_zone_size = 0;
6232 frame->to_allocate -= frame->red_zone_size;
6233 frame->stack_pointer_offset -= frame->red_zone_size;
6235 fprintf (stderr, "\n");
6236 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
6237 fprintf (stderr, "size: %ld\n", (long)size);
6238 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
6239 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
6240 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
6241 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
6242 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
6243 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
6244 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
6245 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
6246 (long)frame->hard_frame_pointer_offset);
6247 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
6248 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
6249 fprintf (stderr, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca);
6250 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
6254 /* Emit code to save registers in the prologue. */
6257 ix86_emit_save_regs (void)
6262 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
6263 if (ix86_save_reg (regno, true))
6265 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
6266 RTX_FRAME_RELATED_P (insn) = 1;
6270 /* Emit code to save registers using MOV insns. First register
6271 is restored from POINTER + OFFSET. */
6273 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
6278 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6279 if (ix86_save_reg (regno, true))
6281 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
6283 gen_rtx_REG (Pmode, regno));
6284 RTX_FRAME_RELATED_P (insn) = 1;
6285 offset += UNITS_PER_WORD;
6289 /* Expand prologue or epilogue stack adjustment.
6290 The pattern exist to put a dependency on all ebp-based memory accesses.
6291 STYLE should be negative if instructions should be marked as frame related,
6292 zero if %r11 register is live and cannot be freely used and positive
6296 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
6301 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
6302 else if (x86_64_immediate_operand (offset, DImode))
6303 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
6307 /* r11 is used by indirect sibcall return as well, set before the
6308 epilogue and used after the epilogue. ATM indirect sibcall
6309 shouldn't be used together with huge frame sizes in one
6310 function because of the frame_size check in sibcall.c. */
6312 r11 = gen_rtx_REG (DImode, R11_REG);
6313 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
6315 RTX_FRAME_RELATED_P (insn) = 1;
6316 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
6320 RTX_FRAME_RELATED_P (insn) = 1;
6323 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
6326 ix86_internal_arg_pointer (void)
6328 bool has_force_align_arg_pointer =
6329 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
6330 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
6331 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
6332 && DECL_NAME (current_function_decl)
6333 && MAIN_NAME_P (DECL_NAME (current_function_decl))
6334 && DECL_FILE_SCOPE_P (current_function_decl))
6335 || ix86_force_align_arg_pointer
6336 || has_force_align_arg_pointer)
6338 /* Nested functions can't realign the stack due to a register
6340 if (DECL_CONTEXT (current_function_decl)
6341 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
6343 if (ix86_force_align_arg_pointer)
6344 warning (0, "-mstackrealign ignored for nested functions");
6345 if (has_force_align_arg_pointer)
6346 error ("%s not supported for nested functions",
6347 ix86_force_align_arg_pointer_string);
6348 return virtual_incoming_args_rtx;
6350 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, CX_REG);
6351 return copy_to_reg (cfun->machine->force_align_arg_pointer);
6354 return virtual_incoming_args_rtx;
6357 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
6358 This is called from dwarf2out.c to emit call frame instructions
6359 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
6361 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
6363 rtx unspec = SET_SRC (pattern);
6364 gcc_assert (GET_CODE (unspec) == UNSPEC);
6368 case UNSPEC_REG_SAVE:
6369 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
6370 SET_DEST (pattern));
6372 case UNSPEC_DEF_CFA:
6373 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
6374 INTVAL (XVECEXP (unspec, 0, 0)));
6381 /* Expand the prologue into a bunch of separate insns. */
6384 ix86_expand_prologue (void)
6388 struct ix86_frame frame;
6389 HOST_WIDE_INT allocate;
6391 ix86_compute_frame_layout (&frame);
6393 if (cfun->machine->force_align_arg_pointer)
6397 /* Grab the argument pointer. */
6398 x = plus_constant (stack_pointer_rtx, 4);
6399 y = cfun->machine->force_align_arg_pointer;
6400 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
6401 RTX_FRAME_RELATED_P (insn) = 1;
6403 /* The unwind info consists of two parts: install the fafp as the cfa,
6404 and record the fafp as the "save register" of the stack pointer.
6405 The later is there in order that the unwinder can see where it
6406 should restore the stack pointer across the and insn. */
6407 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
6408 x = gen_rtx_SET (VOIDmode, y, x);
6409 RTX_FRAME_RELATED_P (x) = 1;
6410 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
6412 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
6413 RTX_FRAME_RELATED_P (y) = 1;
6414 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
6415 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6416 REG_NOTES (insn) = x;
6418 /* Align the stack. */
6419 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
6422 /* And here we cheat like madmen with the unwind info. We force the
6423 cfa register back to sp+4, which is exactly what it was at the
6424 start of the function. Re-pushing the return address results in
6425 the return at the same spot relative to the cfa, and thus is
6426 correct wrt the unwind info. */
6427 x = cfun->machine->force_align_arg_pointer;
6428 x = gen_frame_mem (Pmode, plus_constant (x, -4));
6429 insn = emit_insn (gen_push (x));
6430 RTX_FRAME_RELATED_P (insn) = 1;
6433 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
6434 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
6435 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6436 REG_NOTES (insn) = x;
6439 /* Note: AT&T enter does NOT have reversed args. Enter is probably
6440 slower on all targets. Also sdb doesn't like it. */
6442 if (frame_pointer_needed)
6444 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
6445 RTX_FRAME_RELATED_P (insn) = 1;
6447 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
6448 RTX_FRAME_RELATED_P (insn) = 1;
6451 allocate = frame.to_allocate;
6453 if (!frame.save_regs_using_mov)
6454 ix86_emit_save_regs ();
6456 allocate += frame.nregs * UNITS_PER_WORD;
6458 /* When using red zone we may start register saving before allocating
6459 the stack frame saving one cycle of the prologue. However I will
6460 avoid doing this if I am going to have to probe the stack since
6461 at least on x86_64 the stack probe can turn into a call that clobbers
6462 a red zone location */
6463 if (TARGET_RED_ZONE && frame.save_regs_using_mov
6464 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
6465 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
6466 : stack_pointer_rtx,
6467 -frame.nregs * UNITS_PER_WORD);
6471 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
6472 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6473 GEN_INT (-allocate), -1);
6476 /* Only valid for Win32. */
6477 rtx eax = gen_rtx_REG (Pmode, AX_REG);
6481 gcc_assert (!TARGET_64BIT || TARGET_64BIT_MS_ABI);
6483 if (TARGET_64BIT_MS_ABI)
6486 eax_live = ix86_eax_live_at_start_p ();
6490 emit_insn (gen_push (eax));
6491 allocate -= UNITS_PER_WORD;
6494 emit_move_insn (eax, GEN_INT (allocate));
6497 insn = gen_allocate_stack_worker_64 (eax);
6499 insn = gen_allocate_stack_worker_32 (eax);
6500 insn = emit_insn (insn);
6501 RTX_FRAME_RELATED_P (insn) = 1;
6502 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
6503 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
6504 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
6505 t, REG_NOTES (insn));
6509 if (frame_pointer_needed)
6510 t = plus_constant (hard_frame_pointer_rtx,
6513 - frame.nregs * UNITS_PER_WORD);
6515 t = plus_constant (stack_pointer_rtx, allocate);
6516 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
6520 if (frame.save_regs_using_mov
6521 && !(TARGET_RED_ZONE
6522 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
6524 if (!frame_pointer_needed || !frame.to_allocate)
6525 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
6527 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
6528 -frame.nregs * UNITS_PER_WORD);
6531 pic_reg_used = false;
6532 if (pic_offset_table_rtx
6533 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6534 || current_function_profile))
6536 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
6538 if (alt_pic_reg_used != INVALID_REGNUM)
6539 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
6541 pic_reg_used = true;
6548 if (ix86_cmodel == CM_LARGE_PIC)
6550 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
6551 rtx label = gen_label_rtx ();
6553 LABEL_PRESERVE_P (label) = 1;
6554 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
6555 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
6556 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6557 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
6558 pic_offset_table_rtx, tmp_reg));
6561 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6564 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
6567 /* Prevent function calls from being scheduled before the call to mcount.
6568 In the pic_reg_used case, make sure that the got load isn't deleted. */
6569 if (current_function_profile)
6572 emit_insn (gen_prologue_use (pic_offset_table_rtx));
6573 emit_insn (gen_blockage ());
6577 /* Emit code to restore saved registers using MOV insns. First register
6578 is restored from POINTER + OFFSET. */
6580 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
6581 int maybe_eh_return)
6584 rtx base_address = gen_rtx_MEM (Pmode, pointer);
6586 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6587 if (ix86_save_reg (regno, maybe_eh_return))
6589 /* Ensure that adjust_address won't be forced to produce pointer
6590 out of range allowed by x86-64 instruction set. */
6591 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
6595 r11 = gen_rtx_REG (DImode, R11_REG);
6596 emit_move_insn (r11, GEN_INT (offset));
6597 emit_insn (gen_adddi3 (r11, r11, pointer));
6598 base_address = gen_rtx_MEM (Pmode, r11);
6601 emit_move_insn (gen_rtx_REG (Pmode, regno),
6602 adjust_address (base_address, Pmode, offset));
6603 offset += UNITS_PER_WORD;
6607 /* Restore function stack, frame, and registers. */
6610 ix86_expand_epilogue (int style)
6613 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
6614 struct ix86_frame frame;
6615 HOST_WIDE_INT offset;
6617 ix86_compute_frame_layout (&frame);
6619 /* Calculate start of saved registers relative to ebp. Special care
6620 must be taken for the normal return case of a function using
6621 eh_return: the eax and edx registers are marked as saved, but not
6622 restored along this path. */
6623 offset = frame.nregs;
6624 if (current_function_calls_eh_return && style != 2)
6626 offset *= -UNITS_PER_WORD;
6628 /* If we're only restoring one register and sp is not valid then
6629 using a move instruction to restore the register since it's
6630 less work than reloading sp and popping the register.
6632 The default code result in stack adjustment using add/lea instruction,
6633 while this code results in LEAVE instruction (or discrete equivalent),
6634 so it is profitable in some other cases as well. Especially when there
6635 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6636 and there is exactly one register to pop. This heuristic may need some
6637 tuning in future. */
6638 if ((!sp_valid && frame.nregs <= 1)
6639 || (TARGET_EPILOGUE_USING_MOVE
6640 && cfun->machine->use_fast_prologue_epilogue
6641 && (frame.nregs > 1 || frame.to_allocate))
6642 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
6643 || (frame_pointer_needed && TARGET_USE_LEAVE
6644 && cfun->machine->use_fast_prologue_epilogue
6645 && frame.nregs == 1)
6646 || current_function_calls_eh_return)
6648 /* Restore registers. We can use ebp or esp to address the memory
6649 locations. If both are available, default to ebp, since offsets
6650 are known to be small. Only exception is esp pointing directly to the
6651 end of block of saved registers, where we may simplify addressing
6654 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
6655 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6656 frame.to_allocate, style == 2);
6658 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6659 offset, style == 2);
6661 /* eh_return epilogues need %ecx added to the stack pointer. */
6664 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
6666 if (frame_pointer_needed)
6668 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6669 tmp = plus_constant (tmp, UNITS_PER_WORD);
6670 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6672 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6673 emit_move_insn (hard_frame_pointer_rtx, tmp);
6675 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6680 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6681 tmp = plus_constant (tmp, (frame.to_allocate
6682 + frame.nregs * UNITS_PER_WORD));
6683 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6686 else if (!frame_pointer_needed)
6687 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6688 GEN_INT (frame.to_allocate
6689 + frame.nregs * UNITS_PER_WORD),
6691 /* If not an i386, mov & pop is faster than "leave". */
6692 else if (TARGET_USE_LEAVE || optimize_size
6693 || !cfun->machine->use_fast_prologue_epilogue)
6694 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6697 pro_epilogue_adjust_stack (stack_pointer_rtx,
6698 hard_frame_pointer_rtx,
6701 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6703 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6708 /* First step is to deallocate the stack frame so that we can
6709 pop the registers. */
6712 gcc_assert (frame_pointer_needed);
6713 pro_epilogue_adjust_stack (stack_pointer_rtx,
6714 hard_frame_pointer_rtx,
6715 GEN_INT (offset), style);
6717 else if (frame.to_allocate)
6718 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6719 GEN_INT (frame.to_allocate), style);
6721 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6722 if (ix86_save_reg (regno, false))
6725 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6727 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6729 if (frame_pointer_needed)
6731 /* Leave results in shorter dependency chains on CPUs that are
6732 able to grok it fast. */
6733 if (TARGET_USE_LEAVE)
6734 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6735 else if (TARGET_64BIT)
6736 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6738 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6742 if (cfun->machine->force_align_arg_pointer)
6744 emit_insn (gen_addsi3 (stack_pointer_rtx,
6745 cfun->machine->force_align_arg_pointer,
6749 /* Sibcall epilogues don't want a return instruction. */
6753 if (crtl->args.pops_args && crtl->args.size)
6755 rtx popc = GEN_INT (crtl->args.pops_args);
6757 /* i386 can only pop 64K bytes. If asked to pop more, pop
6758 return address, do explicit add, and jump indirectly to the
6761 if (crtl->args.pops_args >= 65536)
6763 rtx ecx = gen_rtx_REG (SImode, CX_REG);
6765 /* There is no "pascal" calling convention in any 64bit ABI. */
6766 gcc_assert (!TARGET_64BIT);
6768 emit_insn (gen_popsi1 (ecx));
6769 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6770 emit_jump_insn (gen_return_indirect_internal (ecx));
6773 emit_jump_insn (gen_return_pop_internal (popc));
6776 emit_jump_insn (gen_return_internal ());
6779 /* Reset from the function's potential modifications. */
6782 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6783 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6785 if (pic_offset_table_rtx)
6786 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
6788 /* Mach-O doesn't support labels at the end of objects, so if
6789 it looks like we might want one, insert a NOP. */
6791 rtx insn = get_last_insn ();
6794 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
6795 insn = PREV_INSN (insn);
6799 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
6800 fputs ("\tnop\n", file);
6806 /* Extract the parts of an RTL expression that is a valid memory address
6807 for an instruction. Return 0 if the structure of the address is
6808 grossly off. Return -1 if the address contains ASHIFT, so it is not
6809 strictly valid, but still used for computing length of lea instruction. */
6812 ix86_decompose_address (rtx addr, struct ix86_address *out)
6814 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6815 rtx base_reg, index_reg;
6816 HOST_WIDE_INT scale = 1;
6817 rtx scale_rtx = NULL_RTX;
6819 enum ix86_address_seg seg = SEG_DEFAULT;
6821 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6823 else if (GET_CODE (addr) == PLUS)
6833 addends[n++] = XEXP (op, 1);
6836 while (GET_CODE (op) == PLUS);
6841 for (i = n; i >= 0; --i)
6844 switch (GET_CODE (op))
6849 index = XEXP (op, 0);
6850 scale_rtx = XEXP (op, 1);
6854 if (XINT (op, 1) == UNSPEC_TP
6855 && TARGET_TLS_DIRECT_SEG_REFS
6856 && seg == SEG_DEFAULT)
6857 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6886 else if (GET_CODE (addr) == MULT)
6888 index = XEXP (addr, 0); /* index*scale */
6889 scale_rtx = XEXP (addr, 1);
6891 else if (GET_CODE (addr) == ASHIFT)
6895 /* We're called for lea too, which implements ashift on occasion. */
6896 index = XEXP (addr, 0);
6897 tmp = XEXP (addr, 1);
6898 if (!CONST_INT_P (tmp))
6900 scale = INTVAL (tmp);
6901 if ((unsigned HOST_WIDE_INT) scale > 3)
6907 disp = addr; /* displacement */
6909 /* Extract the integral value of scale. */
6912 if (!CONST_INT_P (scale_rtx))
6914 scale = INTVAL (scale_rtx);
6917 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6918 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6920 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6921 if (base_reg && index_reg && scale == 1
6922 && (index_reg == arg_pointer_rtx
6923 || index_reg == frame_pointer_rtx
6924 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6927 tmp = base, base = index, index = tmp;
6928 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6931 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6932 if ((base_reg == hard_frame_pointer_rtx
6933 || base_reg == frame_pointer_rtx
6934 || base_reg == arg_pointer_rtx) && !disp)
6937 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6938 Avoid this by transforming to [%esi+0]. */
6939 if (TARGET_K6 && !optimize_size
6940 && base_reg && !index_reg && !disp
6942 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6945 /* Special case: encode reg+reg instead of reg*2. */
6946 if (!base && index && scale && scale == 2)
6947 base = index, base_reg = index_reg, scale = 1;
6949 /* Special case: scaling cannot be encoded without base or displacement. */
6950 if (!base && !disp && index && scale != 1)
6962 /* Return cost of the memory address x.
6963 For i386, it is better to use a complex address than let gcc copy
6964 the address into a reg and make a new pseudo. But not if the address
6965 requires to two regs - that would mean more pseudos with longer
6968 ix86_address_cost (rtx x)
6970 struct ix86_address parts;
6972 int ok = ix86_decompose_address (x, &parts);
6976 if (parts.base && GET_CODE (parts.base) == SUBREG)
6977 parts.base = SUBREG_REG (parts.base);
6978 if (parts.index && GET_CODE (parts.index) == SUBREG)
6979 parts.index = SUBREG_REG (parts.index);
6981 /* Attempt to minimize number of registers in the address. */
6983 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6985 && (!REG_P (parts.index)
6986 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6990 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6992 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6993 && parts.base != parts.index)
6996 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6997 since it's predecode logic can't detect the length of instructions
6998 and it degenerates to vector decoded. Increase cost of such
6999 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
7000 to split such addresses or even refuse such addresses at all.
7002 Following addressing modes are affected:
7007 The first and last case may be avoidable by explicitly coding the zero in
7008 memory address, but I don't have AMD-K6 machine handy to check this
7012 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
7013 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
7014 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
7020 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
7021 this is used for to form addresses to local data when -fPIC is in
7025 darwin_local_data_pic (rtx disp)
7027 if (GET_CODE (disp) == MINUS)
7029 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
7030 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
7031 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
7033 const char *sym_name = XSTR (XEXP (disp, 1), 0);
7034 if (! strcmp (sym_name, "<pic base>"))
7042 /* Determine if a given RTX is a valid constant. We already know this
7043 satisfies CONSTANT_P. */
7046 legitimate_constant_p (rtx x)
7048 switch (GET_CODE (x))
7053 if (GET_CODE (x) == PLUS)
7055 if (!CONST_INT_P (XEXP (x, 1)))
7060 if (TARGET_MACHO && darwin_local_data_pic (x))
7063 /* Only some unspecs are valid as "constants". */
7064 if (GET_CODE (x) == UNSPEC)
7065 switch (XINT (x, 1))
7070 return TARGET_64BIT;
7073 x = XVECEXP (x, 0, 0);
7074 return (GET_CODE (x) == SYMBOL_REF
7075 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
7077 x = XVECEXP (x, 0, 0);
7078 return (GET_CODE (x) == SYMBOL_REF
7079 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
7084 /* We must have drilled down to a symbol. */
7085 if (GET_CODE (x) == LABEL_REF)
7087 if (GET_CODE (x) != SYMBOL_REF)
7092 /* TLS symbols are never valid. */
7093 if (SYMBOL_REF_TLS_MODEL (x))
7096 /* DLLIMPORT symbols are never valid. */
7097 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
7098 && SYMBOL_REF_DLLIMPORT_P (x))
7103 if (GET_MODE (x) == TImode
7104 && x != CONST0_RTX (TImode)
7110 if (x == CONST0_RTX (GET_MODE (x)))
7118 /* Otherwise we handle everything else in the move patterns. */
7122 /* Determine if it's legal to put X into the constant pool. This
7123 is not possible for the address of thread-local symbols, which
7124 is checked above. */
7127 ix86_cannot_force_const_mem (rtx x)
7129 /* We can always put integral constants and vectors in memory. */
7130 switch (GET_CODE (x))
7140 return !legitimate_constant_p (x);
7143 /* Determine if a given RTX is a valid constant address. */
7146 constant_address_p (rtx x)
7148 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
7151 /* Nonzero if the constant value X is a legitimate general operand
7152 when generating PIC code. It is given that flag_pic is on and
7153 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
7156 legitimate_pic_operand_p (rtx x)
7160 switch (GET_CODE (x))
7163 inner = XEXP (x, 0);
7164 if (GET_CODE (inner) == PLUS
7165 && CONST_INT_P (XEXP (inner, 1)))
7166 inner = XEXP (inner, 0);
7168 /* Only some unspecs are valid as "constants". */
7169 if (GET_CODE (inner) == UNSPEC)
7170 switch (XINT (inner, 1))
7175 return TARGET_64BIT;
7177 x = XVECEXP (inner, 0, 0);
7178 return (GET_CODE (x) == SYMBOL_REF
7179 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
7187 return legitimate_pic_address_disp_p (x);
7194 /* Determine if a given CONST RTX is a valid memory displacement
7198 legitimate_pic_address_disp_p (rtx disp)
7202 /* In 64bit mode we can allow direct addresses of symbols and labels
7203 when they are not dynamic symbols. */
7206 rtx op0 = disp, op1;
7208 switch (GET_CODE (disp))
7214 if (GET_CODE (XEXP (disp, 0)) != PLUS)
7216 op0 = XEXP (XEXP (disp, 0), 0);
7217 op1 = XEXP (XEXP (disp, 0), 1);
7218 if (!CONST_INT_P (op1)
7219 || INTVAL (op1) >= 16*1024*1024
7220 || INTVAL (op1) < -16*1024*1024)
7222 if (GET_CODE (op0) == LABEL_REF)
7224 if (GET_CODE (op0) != SYMBOL_REF)
7229 /* TLS references should always be enclosed in UNSPEC. */
7230 if (SYMBOL_REF_TLS_MODEL (op0))
7232 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
7233 && ix86_cmodel != CM_LARGE_PIC)
7241 if (GET_CODE (disp) != CONST)
7243 disp = XEXP (disp, 0);
7247 /* We are unsafe to allow PLUS expressions. This limit allowed distance
7248 of GOT tables. We should not need these anyway. */
7249 if (GET_CODE (disp) != UNSPEC
7250 || (XINT (disp, 1) != UNSPEC_GOTPCREL
7251 && XINT (disp, 1) != UNSPEC_GOTOFF
7252 && XINT (disp, 1) != UNSPEC_PLTOFF))
7255 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
7256 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
7262 if (GET_CODE (disp) == PLUS)
7264 if (!CONST_INT_P (XEXP (disp, 1)))
7266 disp = XEXP (disp, 0);
7270 if (TARGET_MACHO && darwin_local_data_pic (disp))
7273 if (GET_CODE (disp) != UNSPEC)
7276 switch (XINT (disp, 1))
7281 /* We need to check for both symbols and labels because VxWorks loads
7282 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
7284 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
7285 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
7287 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
7288 While ABI specify also 32bit relocation but we don't produce it in
7289 small PIC model at all. */
7290 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
7291 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
7293 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
7295 case UNSPEC_GOTTPOFF:
7296 case UNSPEC_GOTNTPOFF:
7297 case UNSPEC_INDNTPOFF:
7300 disp = XVECEXP (disp, 0, 0);
7301 return (GET_CODE (disp) == SYMBOL_REF
7302 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
7304 disp = XVECEXP (disp, 0, 0);
7305 return (GET_CODE (disp) == SYMBOL_REF
7306 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
7308 disp = XVECEXP (disp, 0, 0);
7309 return (GET_CODE (disp) == SYMBOL_REF
7310 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
7316 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
7317 memory address for an instruction. The MODE argument is the machine mode
7318 for the MEM expression that wants to use this address.
7320 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
7321 convert common non-canonical forms to canonical form so that they will
7325 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
7326 rtx addr, int strict)
7328 struct ix86_address parts;
7329 rtx base, index, disp;
7330 HOST_WIDE_INT scale;
7331 const char *reason = NULL;
7332 rtx reason_rtx = NULL_RTX;
7334 if (ix86_decompose_address (addr, &parts) <= 0)
7336 reason = "decomposition failed";
7341 index = parts.index;
7343 scale = parts.scale;
7345 /* Validate base register.
7347 Don't allow SUBREG's that span more than a word here. It can lead to spill
7348 failures when the base is one word out of a two word structure, which is
7349 represented internally as a DImode int. */
7358 else if (GET_CODE (base) == SUBREG
7359 && REG_P (SUBREG_REG (base))
7360 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
7362 reg = SUBREG_REG (base);
7365 reason = "base is not a register";
7369 if (GET_MODE (base) != Pmode)
7371 reason = "base is not in Pmode";
7375 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
7376 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
7378 reason = "base is not valid";
7383 /* Validate index register.
7385 Don't allow SUBREG's that span more than a word here -- same as above. */
7394 else if (GET_CODE (index) == SUBREG
7395 && REG_P (SUBREG_REG (index))
7396 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
7398 reg = SUBREG_REG (index);
7401 reason = "index is not a register";
7405 if (GET_MODE (index) != Pmode)
7407 reason = "index is not in Pmode";
7411 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
7412 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
7414 reason = "index is not valid";
7419 /* Validate scale factor. */
7422 reason_rtx = GEN_INT (scale);
7425 reason = "scale without index";
7429 if (scale != 2 && scale != 4 && scale != 8)
7431 reason = "scale is not a valid multiplier";
7436 /* Validate displacement. */
7441 if (GET_CODE (disp) == CONST
7442 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
7443 switch (XINT (XEXP (disp, 0), 1))
7445 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
7446 used. While ABI specify also 32bit relocations, we don't produce
7447 them at all and use IP relative instead. */
7450 gcc_assert (flag_pic);
7452 goto is_legitimate_pic;
7453 reason = "64bit address unspec";
7456 case UNSPEC_GOTPCREL:
7457 gcc_assert (flag_pic);
7458 goto is_legitimate_pic;
7460 case UNSPEC_GOTTPOFF:
7461 case UNSPEC_GOTNTPOFF:
7462 case UNSPEC_INDNTPOFF:
7468 reason = "invalid address unspec";
7472 else if (SYMBOLIC_CONST (disp)
7476 && MACHOPIC_INDIRECT
7477 && !machopic_operand_p (disp)
7483 if (TARGET_64BIT && (index || base))
7485 /* foo@dtpoff(%rX) is ok. */
7486 if (GET_CODE (disp) != CONST
7487 || GET_CODE (XEXP (disp, 0)) != PLUS
7488 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
7489 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
7490 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
7491 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
7493 reason = "non-constant pic memory reference";
7497 else if (! legitimate_pic_address_disp_p (disp))
7499 reason = "displacement is an invalid pic construct";
7503 /* This code used to verify that a symbolic pic displacement
7504 includes the pic_offset_table_rtx register.
7506 While this is good idea, unfortunately these constructs may
7507 be created by "adds using lea" optimization for incorrect
7516 This code is nonsensical, but results in addressing
7517 GOT table with pic_offset_table_rtx base. We can't
7518 just refuse it easily, since it gets matched by
7519 "addsi3" pattern, that later gets split to lea in the
7520 case output register differs from input. While this
7521 can be handled by separate addsi pattern for this case
7522 that never results in lea, this seems to be easier and
7523 correct fix for crash to disable this test. */
7525 else if (GET_CODE (disp) != LABEL_REF
7526 && !CONST_INT_P (disp)
7527 && (GET_CODE (disp) != CONST
7528 || !legitimate_constant_p (disp))
7529 && (GET_CODE (disp) != SYMBOL_REF
7530 || !legitimate_constant_p (disp)))
7532 reason = "displacement is not constant";
7535 else if (TARGET_64BIT
7536 && !x86_64_immediate_operand (disp, VOIDmode))
7538 reason = "displacement is out of range";
7543 /* Everything looks valid. */
7550 /* Return a unique alias set for the GOT. */
7552 static alias_set_type
7553 ix86_GOT_alias_set (void)
7555 static alias_set_type set = -1;
7557 set = new_alias_set ();
7561 /* Return a legitimate reference for ORIG (an address) using the
7562 register REG. If REG is 0, a new pseudo is generated.
7564 There are two types of references that must be handled:
7566 1. Global data references must load the address from the GOT, via
7567 the PIC reg. An insn is emitted to do this load, and the reg is
7570 2. Static data references, constant pool addresses, and code labels
7571 compute the address as an offset from the GOT, whose base is in
7572 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7573 differentiate them from global data objects. The returned
7574 address is the PIC reg + an unspec constant.
7576 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7577 reg also appears in the address. */
7580 legitimize_pic_address (rtx orig, rtx reg)
7587 if (TARGET_MACHO && !TARGET_64BIT)
7590 reg = gen_reg_rtx (Pmode);
7591 /* Use the generic Mach-O PIC machinery. */
7592 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7596 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
7598 else if (TARGET_64BIT
7599 && ix86_cmodel != CM_SMALL_PIC
7600 && gotoff_operand (addr, Pmode))
7603 /* This symbol may be referenced via a displacement from the PIC
7604 base address (@GOTOFF). */
7606 if (reload_in_progress)
7607 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7608 if (GET_CODE (addr) == CONST)
7609 addr = XEXP (addr, 0);
7610 if (GET_CODE (addr) == PLUS)
7612 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7614 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7617 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7618 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7620 tmpreg = gen_reg_rtx (Pmode);
7623 emit_move_insn (tmpreg, new_rtx);
7627 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7628 tmpreg, 1, OPTAB_DIRECT);
7631 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7633 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
7635 /* This symbol may be referenced via a displacement from the PIC
7636 base address (@GOTOFF). */
7638 if (reload_in_progress)
7639 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7640 if (GET_CODE (addr) == CONST)
7641 addr = XEXP (addr, 0);
7642 if (GET_CODE (addr) == PLUS)
7644 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7646 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7649 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7650 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7651 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7655 emit_move_insn (reg, new_rtx);
7659 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7660 /* We can't use @GOTOFF for text labels on VxWorks;
7661 see gotoff_operand. */
7662 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
7664 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
7666 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
7667 return legitimize_dllimport_symbol (addr, true);
7668 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
7669 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
7670 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
7672 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
7673 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
7677 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
7679 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7680 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7681 new_rtx = gen_const_mem (Pmode, new_rtx);
7682 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7685 reg = gen_reg_rtx (Pmode);
7686 /* Use directly gen_movsi, otherwise the address is loaded
7687 into register for CSE. We don't want to CSE this addresses,
7688 instead we CSE addresses from the GOT table, so skip this. */
7689 emit_insn (gen_movsi (reg, new_rtx));
7694 /* This symbol must be referenced via a load from the
7695 Global Offset Table (@GOT). */
7697 if (reload_in_progress)
7698 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7699 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7700 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7702 new_rtx = force_reg (Pmode, new_rtx);
7703 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7704 new_rtx = gen_const_mem (Pmode, new_rtx);
7705 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7708 reg = gen_reg_rtx (Pmode);
7709 emit_move_insn (reg, new_rtx);
7715 if (CONST_INT_P (addr)
7716 && !x86_64_immediate_operand (addr, VOIDmode))
7720 emit_move_insn (reg, addr);
7724 new_rtx = force_reg (Pmode, addr);
7726 else if (GET_CODE (addr) == CONST)
7728 addr = XEXP (addr, 0);
7730 /* We must match stuff we generate before. Assume the only
7731 unspecs that can get here are ours. Not that we could do
7732 anything with them anyway.... */
7733 if (GET_CODE (addr) == UNSPEC
7734 || (GET_CODE (addr) == PLUS
7735 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7737 gcc_assert (GET_CODE (addr) == PLUS);
7739 if (GET_CODE (addr) == PLUS)
7741 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7743 /* Check first to see if this is a constant offset from a @GOTOFF
7744 symbol reference. */
7745 if (gotoff_operand (op0, Pmode)
7746 && CONST_INT_P (op1))
7750 if (reload_in_progress)
7751 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7752 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7754 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
7755 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7756 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7760 emit_move_insn (reg, new_rtx);
7766 if (INTVAL (op1) < -16*1024*1024
7767 || INTVAL (op1) >= 16*1024*1024)
7769 if (!x86_64_immediate_operand (op1, Pmode))
7770 op1 = force_reg (Pmode, op1);
7771 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7777 base = legitimize_pic_address (XEXP (addr, 0), reg);
7778 new_rtx = legitimize_pic_address (XEXP (addr, 1),
7779 base == reg ? NULL_RTX : reg);
7781 if (CONST_INT_P (new_rtx))
7782 new_rtx = plus_constant (base, INTVAL (new_rtx));
7785 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
7787 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
7788 new_rtx = XEXP (new_rtx, 1);
7790 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
7798 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7801 get_thread_pointer (int to_reg)
7805 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7809 reg = gen_reg_rtx (Pmode);
7810 insn = gen_rtx_SET (VOIDmode, reg, tp);
7811 insn = emit_insn (insn);
7816 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7817 false if we expect this to be used for a memory address and true if
7818 we expect to load the address into a register. */
7821 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7823 rtx dest, base, off, pic, tp;
7828 case TLS_MODEL_GLOBAL_DYNAMIC:
7829 dest = gen_reg_rtx (Pmode);
7830 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7832 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7834 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
7837 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7838 insns = get_insns ();
7841 CONST_OR_PURE_CALL_P (insns) = 1;
7842 emit_libcall_block (insns, dest, rax, x);
7844 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7845 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7847 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7849 if (TARGET_GNU2_TLS)
7851 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7853 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7857 case TLS_MODEL_LOCAL_DYNAMIC:
7858 base = gen_reg_rtx (Pmode);
7859 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7861 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7863 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
7866 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7867 insns = get_insns ();
7870 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7871 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7872 CONST_OR_PURE_CALL_P (insns) = 1;
7873 emit_libcall_block (insns, base, rax, note);
7875 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7876 emit_insn (gen_tls_local_dynamic_base_64 (base));
7878 emit_insn (gen_tls_local_dynamic_base_32 (base));
7880 if (TARGET_GNU2_TLS)
7882 rtx x = ix86_tls_module_base ();
7884 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7885 gen_rtx_MINUS (Pmode, x, tp));
7888 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7889 off = gen_rtx_CONST (Pmode, off);
7891 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7893 if (TARGET_GNU2_TLS)
7895 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7897 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7902 case TLS_MODEL_INITIAL_EXEC:
7906 type = UNSPEC_GOTNTPOFF;
7910 if (reload_in_progress)
7911 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7912 pic = pic_offset_table_rtx;
7913 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7915 else if (!TARGET_ANY_GNU_TLS)
7917 pic = gen_reg_rtx (Pmode);
7918 emit_insn (gen_set_got (pic));
7919 type = UNSPEC_GOTTPOFF;
7924 type = UNSPEC_INDNTPOFF;
7927 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7928 off = gen_rtx_CONST (Pmode, off);
7930 off = gen_rtx_PLUS (Pmode, pic, off);
7931 off = gen_const_mem (Pmode, off);
7932 set_mem_alias_set (off, ix86_GOT_alias_set ());
7934 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7936 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7937 off = force_reg (Pmode, off);
7938 return gen_rtx_PLUS (Pmode, base, off);
7942 base = get_thread_pointer (true);
7943 dest = gen_reg_rtx (Pmode);
7944 emit_insn (gen_subsi3 (dest, base, off));
7948 case TLS_MODEL_LOCAL_EXEC:
7949 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7950 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7951 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7952 off = gen_rtx_CONST (Pmode, off);
7954 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7956 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7957 return gen_rtx_PLUS (Pmode, base, off);
7961 base = get_thread_pointer (true);
7962 dest = gen_reg_rtx (Pmode);
7963 emit_insn (gen_subsi3 (dest, base, off));
7974 /* Create or return the unique __imp_DECL dllimport symbol corresponding
7977 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
7978 htab_t dllimport_map;
7981 get_dllimport_decl (tree decl)
7983 struct tree_map *h, in;
7987 size_t namelen, prefixlen;
7993 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
7995 in.hash = htab_hash_pointer (decl);
7996 in.base.from = decl;
7997 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
7998 h = (struct tree_map *) *loc;
8002 *loc = h = GGC_NEW (struct tree_map);
8004 h->base.from = decl;
8005 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
8006 DECL_ARTIFICIAL (to) = 1;
8007 DECL_IGNORED_P (to) = 1;
8008 DECL_EXTERNAL (to) = 1;
8009 TREE_READONLY (to) = 1;
8011 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
8012 name = targetm.strip_name_encoding (name);
8013 prefix = name[0] == FASTCALL_PREFIX ? "*__imp_": "*__imp__";
8014 namelen = strlen (name);
8015 prefixlen = strlen (prefix);
8016 imp_name = (char *) alloca (namelen + prefixlen + 1);
8017 memcpy (imp_name, prefix, prefixlen);
8018 memcpy (imp_name + prefixlen, name, namelen + 1);
8020 name = ggc_alloc_string (imp_name, namelen + prefixlen);
8021 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
8022 SET_SYMBOL_REF_DECL (rtl, to);
8023 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
8025 rtl = gen_const_mem (Pmode, rtl);
8026 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
8028 SET_DECL_RTL (to, rtl);
8029 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
8034 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
8035 true if we require the result be a register. */
8038 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
8043 gcc_assert (SYMBOL_REF_DECL (symbol));
8044 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
8046 x = DECL_RTL (imp_decl);
8048 x = force_reg (Pmode, x);
8052 /* Try machine-dependent ways of modifying an illegitimate address
8053 to be legitimate. If we find one, return the new, valid address.
8054 This macro is used in only one place: `memory_address' in explow.c.
8056 OLDX is the address as it was before break_out_memory_refs was called.
8057 In some cases it is useful to look at this to decide what needs to be done.
8059 MODE and WIN are passed so that this macro can use
8060 GO_IF_LEGITIMATE_ADDRESS.
8062 It is always safe for this macro to do nothing. It exists to recognize
8063 opportunities to optimize the output.
8065 For the 80386, we handle X+REG by loading X into a register R and
8066 using R+REG. R will go in a general reg and indexing will be used.
8067 However, if REG is a broken-out memory address or multiplication,
8068 nothing needs to be done because REG can certainly go in a general reg.
8070 When -fpic is used, special handling is needed for symbolic references.
8071 See comments by legitimize_pic_address in i386.c for details. */
8074 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
8079 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
8081 return legitimize_tls_address (x, (enum tls_model) log, false);
8082 if (GET_CODE (x) == CONST
8083 && GET_CODE (XEXP (x, 0)) == PLUS
8084 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
8085 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
8087 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
8088 (enum tls_model) log, false);
8089 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
8092 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
8094 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
8095 return legitimize_dllimport_symbol (x, true);
8096 if (GET_CODE (x) == CONST
8097 && GET_CODE (XEXP (x, 0)) == PLUS
8098 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
8099 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
8101 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
8102 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
8106 if (flag_pic && SYMBOLIC_CONST (x))
8107 return legitimize_pic_address (x, 0);
8109 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
8110 if (GET_CODE (x) == ASHIFT
8111 && CONST_INT_P (XEXP (x, 1))
8112 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
8115 log = INTVAL (XEXP (x, 1));
8116 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
8117 GEN_INT (1 << log));
8120 if (GET_CODE (x) == PLUS)
8122 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
8124 if (GET_CODE (XEXP (x, 0)) == ASHIFT
8125 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
8126 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
8129 log = INTVAL (XEXP (XEXP (x, 0), 1));
8130 XEXP (x, 0) = gen_rtx_MULT (Pmode,
8131 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
8132 GEN_INT (1 << log));
8135 if (GET_CODE (XEXP (x, 1)) == ASHIFT
8136 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
8137 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
8140 log = INTVAL (XEXP (XEXP (x, 1), 1));
8141 XEXP (x, 1) = gen_rtx_MULT (Pmode,
8142 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
8143 GEN_INT (1 << log));
8146 /* Put multiply first if it isn't already. */
8147 if (GET_CODE (XEXP (x, 1)) == MULT)
8149 rtx tmp = XEXP (x, 0);
8150 XEXP (x, 0) = XEXP (x, 1);
8155 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
8156 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
8157 created by virtual register instantiation, register elimination, and
8158 similar optimizations. */
8159 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
8162 x = gen_rtx_PLUS (Pmode,
8163 gen_rtx_PLUS (Pmode, XEXP (x, 0),
8164 XEXP (XEXP (x, 1), 0)),
8165 XEXP (XEXP (x, 1), 1));
8169 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
8170 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
8171 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
8172 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8173 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
8174 && CONSTANT_P (XEXP (x, 1)))
8177 rtx other = NULL_RTX;
8179 if (CONST_INT_P (XEXP (x, 1)))
8181 constant = XEXP (x, 1);
8182 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
8184 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
8186 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
8187 other = XEXP (x, 1);
8195 x = gen_rtx_PLUS (Pmode,
8196 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
8197 XEXP (XEXP (XEXP (x, 0), 1), 0)),
8198 plus_constant (other, INTVAL (constant)));
8202 if (changed && legitimate_address_p (mode, x, FALSE))
8205 if (GET_CODE (XEXP (x, 0)) == MULT)
8208 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
8211 if (GET_CODE (XEXP (x, 1)) == MULT)
8214 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
8218 && REG_P (XEXP (x, 1))
8219 && REG_P (XEXP (x, 0)))
8222 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
8225 x = legitimize_pic_address (x, 0);
8228 if (changed && legitimate_address_p (mode, x, FALSE))
8231 if (REG_P (XEXP (x, 0)))
8233 rtx temp = gen_reg_rtx (Pmode);
8234 rtx val = force_operand (XEXP (x, 1), temp);
8236 emit_move_insn (temp, val);
8242 else if (REG_P (XEXP (x, 1)))
8244 rtx temp = gen_reg_rtx (Pmode);
8245 rtx val = force_operand (XEXP (x, 0), temp);
8247 emit_move_insn (temp, val);
8257 /* Print an integer constant expression in assembler syntax. Addition
8258 and subtraction are the only arithmetic that may appear in these
8259 expressions. FILE is the stdio stream to write to, X is the rtx, and
8260 CODE is the operand print code from the output string. */
8263 output_pic_addr_const (FILE *file, rtx x, int code)
8267 switch (GET_CODE (x))
8270 gcc_assert (flag_pic);
8275 if (! TARGET_MACHO || TARGET_64BIT)
8276 output_addr_const (file, x);
8279 const char *name = XSTR (x, 0);
8281 /* Mark the decl as referenced so that cgraph will
8282 output the function. */
8283 if (SYMBOL_REF_DECL (x))
8284 mark_decl_referenced (SYMBOL_REF_DECL (x));
8287 if (MACHOPIC_INDIRECT
8288 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
8289 name = machopic_indirection_name (x, /*stub_p=*/true);
8291 assemble_name (file, name);
8293 if (!TARGET_MACHO && !TARGET_64BIT_MS_ABI
8294 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
8295 fputs ("@PLT", file);
8302 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
8303 assemble_name (asm_out_file, buf);
8307 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8311 /* This used to output parentheses around the expression,
8312 but that does not work on the 386 (either ATT or BSD assembler). */
8313 output_pic_addr_const (file, XEXP (x, 0), code);
8317 if (GET_MODE (x) == VOIDmode)
8319 /* We can use %d if the number is <32 bits and positive. */
8320 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
8321 fprintf (file, "0x%lx%08lx",
8322 (unsigned long) CONST_DOUBLE_HIGH (x),
8323 (unsigned long) CONST_DOUBLE_LOW (x));
8325 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
8328 /* We can't handle floating point constants;
8329 PRINT_OPERAND must handle them. */
8330 output_operand_lossage ("floating constant misused");
8334 /* Some assemblers need integer constants to appear first. */
8335 if (CONST_INT_P (XEXP (x, 0)))
8337 output_pic_addr_const (file, XEXP (x, 0), code);
8339 output_pic_addr_const (file, XEXP (x, 1), code);
8343 gcc_assert (CONST_INT_P (XEXP (x, 1)));
8344 output_pic_addr_const (file, XEXP (x, 1), code);
8346 output_pic_addr_const (file, XEXP (x, 0), code);
8352 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
8353 output_pic_addr_const (file, XEXP (x, 0), code);
8355 output_pic_addr_const (file, XEXP (x, 1), code);
8357 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
8361 gcc_assert (XVECLEN (x, 0) == 1);
8362 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
8363 switch (XINT (x, 1))
8366 fputs ("@GOT", file);
8369 fputs ("@GOTOFF", file);
8372 fputs ("@PLTOFF", file);
8374 case UNSPEC_GOTPCREL:
8375 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
8376 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
8378 case UNSPEC_GOTTPOFF:
8379 /* FIXME: This might be @TPOFF in Sun ld too. */
8380 fputs ("@GOTTPOFF", file);
8383 fputs ("@TPOFF", file);
8387 fputs ("@TPOFF", file);
8389 fputs ("@NTPOFF", file);
8392 fputs ("@DTPOFF", file);
8394 case UNSPEC_GOTNTPOFF:
8396 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
8397 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
8399 fputs ("@GOTNTPOFF", file);
8401 case UNSPEC_INDNTPOFF:
8402 fputs ("@INDNTPOFF", file);
8405 output_operand_lossage ("invalid UNSPEC as operand");
8411 output_operand_lossage ("invalid expression as operand");
8415 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8416 We need to emit DTP-relative relocations. */
8418 static void ATTRIBUTE_UNUSED
8419 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
8421 fputs (ASM_LONG, file);
8422 output_addr_const (file, x);
8423 fputs ("@DTPOFF", file);
8429 fputs (", 0", file);
8436 /* In the name of slightly smaller debug output, and to cater to
8437 general assembler lossage, recognize PIC+GOTOFF and turn it back
8438 into a direct symbol reference.
8440 On Darwin, this is necessary to avoid a crash, because Darwin
8441 has a different PIC label for each routine but the DWARF debugging
8442 information is not associated with any particular routine, so it's
8443 necessary to remove references to the PIC label from RTL stored by
8444 the DWARF output code. */
8447 ix86_delegitimize_address (rtx orig_x)
8450 /* reg_addend is NULL or a multiple of some register. */
8451 rtx reg_addend = NULL_RTX;
8452 /* const_addend is NULL or a const_int. */
8453 rtx const_addend = NULL_RTX;
8454 /* This is the result, or NULL. */
8455 rtx result = NULL_RTX;
8462 if (GET_CODE (x) != CONST
8463 || GET_CODE (XEXP (x, 0)) != UNSPEC
8464 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
8467 return XVECEXP (XEXP (x, 0), 0, 0);
8470 if (GET_CODE (x) != PLUS
8471 || GET_CODE (XEXP (x, 1)) != CONST)
8474 if (REG_P (XEXP (x, 0))
8475 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
8476 /* %ebx + GOT/GOTOFF */
8478 else if (GET_CODE (XEXP (x, 0)) == PLUS)
8480 /* %ebx + %reg * scale + GOT/GOTOFF */
8481 reg_addend = XEXP (x, 0);
8482 if (REG_P (XEXP (reg_addend, 0))
8483 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
8484 reg_addend = XEXP (reg_addend, 1);
8485 else if (REG_P (XEXP (reg_addend, 1))
8486 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
8487 reg_addend = XEXP (reg_addend, 0);
8490 if (!REG_P (reg_addend)
8491 && GET_CODE (reg_addend) != MULT
8492 && GET_CODE (reg_addend) != ASHIFT)
8498 x = XEXP (XEXP (x, 1), 0);
8499 if (GET_CODE (x) == PLUS
8500 && CONST_INT_P (XEXP (x, 1)))
8502 const_addend = XEXP (x, 1);
8506 if (GET_CODE (x) == UNSPEC
8507 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
8508 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
8509 result = XVECEXP (x, 0, 0);
8511 if (TARGET_MACHO && darwin_local_data_pic (x)
8513 result = XEXP (x, 0);
8519 result = gen_rtx_PLUS (Pmode, result, const_addend);
8521 result = gen_rtx_PLUS (Pmode, reg_addend, result);
8525 /* If X is a machine specific address (i.e. a symbol or label being
8526 referenced as a displacement from the GOT implemented using an
8527 UNSPEC), then return the base term. Otherwise return X. */
8530 ix86_find_base_term (rtx x)
8536 if (GET_CODE (x) != CONST)
8539 if (GET_CODE (term) == PLUS
8540 && (CONST_INT_P (XEXP (term, 1))
8541 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
8542 term = XEXP (term, 0);
8543 if (GET_CODE (term) != UNSPEC
8544 || XINT (term, 1) != UNSPEC_GOTPCREL)
8547 term = XVECEXP (term, 0, 0);
8549 if (GET_CODE (term) != SYMBOL_REF
8550 && GET_CODE (term) != LABEL_REF)
8556 term = ix86_delegitimize_address (x);
8558 if (GET_CODE (term) != SYMBOL_REF
8559 && GET_CODE (term) != LABEL_REF)
8566 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
8571 if (mode == CCFPmode || mode == CCFPUmode)
8573 enum rtx_code second_code, bypass_code;
8574 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
8575 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
8576 code = ix86_fp_compare_code_to_integer (code);
8580 code = reverse_condition (code);
8631 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
8635 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
8636 Those same assemblers have the same but opposite lossage on cmov. */
8638 suffix = fp ? "nbe" : "a";
8639 else if (mode == CCCmode)
8662 gcc_assert (mode == CCmode || mode == CCCmode);
8684 gcc_assert (mode == CCmode || mode == CCCmode);
8685 suffix = fp ? "nb" : "ae";
8688 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
8695 else if (mode == CCCmode)
8696 suffix = fp ? "nb" : "ae";
8701 suffix = fp ? "u" : "p";
8704 suffix = fp ? "nu" : "np";
8709 fputs (suffix, file);
8712 /* Print the name of register X to FILE based on its machine mode and number.
8713 If CODE is 'w', pretend the mode is HImode.
8714 If CODE is 'b', pretend the mode is QImode.
8715 If CODE is 'k', pretend the mode is SImode.
8716 If CODE is 'q', pretend the mode is DImode.
8717 If CODE is 'h', pretend the reg is the 'high' byte register.
8718 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8721 print_reg (rtx x, int code, FILE *file)
8723 gcc_assert (x == pc_rtx
8724 || (REGNO (x) != ARG_POINTER_REGNUM
8725 && REGNO (x) != FRAME_POINTER_REGNUM
8726 && REGNO (x) != FLAGS_REG
8727 && REGNO (x) != FPSR_REG
8728 && REGNO (x) != FPCR_REG));
8730 if (ASSEMBLER_DIALECT == ASM_ATT)
8735 gcc_assert (TARGET_64BIT);
8736 fputs ("rip", file);
8740 if (code == 'w' || MMX_REG_P (x))
8742 else if (code == 'b')
8744 else if (code == 'k')
8746 else if (code == 'q')
8748 else if (code == 'y')
8750 else if (code == 'h')
8753 code = GET_MODE_SIZE (GET_MODE (x));
8755 /* Irritatingly, AMD extended registers use different naming convention
8756 from the normal registers. */
8757 if (REX_INT_REG_P (x))
8759 gcc_assert (TARGET_64BIT);
8763 error ("extended registers have no high halves");
8766 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
8769 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
8772 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
8775 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
8778 error ("unsupported operand size for extended register");
8786 if (STACK_TOP_P (x))
8788 fputs ("st(0)", file);
8795 if (! ANY_FP_REG_P (x))
8796 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
8801 fputs (hi_reg_name[REGNO (x)], file);
8804 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
8806 fputs (qi_reg_name[REGNO (x)], file);
8809 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
8811 fputs (qi_high_reg_name[REGNO (x)], file);
8818 /* Locate some local-dynamic symbol still in use by this function
8819 so that we can print its name in some tls_local_dynamic_base
8823 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8827 if (GET_CODE (x) == SYMBOL_REF
8828 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8830 cfun->machine->some_ld_name = XSTR (x, 0);
8838 get_some_local_dynamic_name (void)
8842 if (cfun->machine->some_ld_name)
8843 return cfun->machine->some_ld_name;
8845 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
8847 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
8848 return cfun->machine->some_ld_name;
8854 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8855 C -- print opcode suffix for set/cmov insn.
8856 c -- like C, but print reversed condition
8857 F,f -- likewise, but for floating-point.
8858 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8860 R -- print the prefix for register names.
8861 z -- print the opcode suffix for the size of the current operand.
8862 * -- print a star (in certain assembler syntax)
8863 A -- print an absolute memory reference.
8864 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8865 s -- print a shift double count, followed by the assemblers argument
8867 b -- print the QImode name of the register for the indicated operand.
8868 %b0 would print %al if operands[0] is reg 0.
8869 w -- likewise, print the HImode name of the register.
8870 k -- likewise, print the SImode name of the register.
8871 q -- likewise, print the DImode name of the register.
8872 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8873 y -- print "st(0)" instead of "st" as a register.
8874 D -- print condition for SSE cmp instruction.
8875 P -- if PIC, print an @PLT suffix.
8876 X -- don't print any sort of PIC '@' suffix for a symbol.
8877 & -- print some in-use local-dynamic symbol name.
8878 H -- print a memory address offset by 8; used for sse high-parts
8879 Y -- print condition for SSE5 com* instruction.
8880 + -- print a branch hint as 'cs' or 'ds' prefix
8881 ; -- print a semicolon (after prefixes due to bug in older gas).
8885 print_operand (FILE *file, rtx x, int code)
8892 if (ASSEMBLER_DIALECT == ASM_ATT)
8897 assemble_name (file, get_some_local_dynamic_name ());
8901 switch (ASSEMBLER_DIALECT)
8908 /* Intel syntax. For absolute addresses, registers should not
8909 be surrounded by braces. */
8913 PRINT_OPERAND (file, x, 0);
8923 PRINT_OPERAND (file, x, 0);
8928 if (ASSEMBLER_DIALECT == ASM_ATT)
8933 if (ASSEMBLER_DIALECT == ASM_ATT)
8938 if (ASSEMBLER_DIALECT == ASM_ATT)
8943 if (ASSEMBLER_DIALECT == ASM_ATT)
8948 if (ASSEMBLER_DIALECT == ASM_ATT)
8953 if (ASSEMBLER_DIALECT == ASM_ATT)
8958 /* 387 opcodes don't get size suffixes if the operands are
8960 if (STACK_REG_P (x))
8963 /* Likewise if using Intel opcodes. */
8964 if (ASSEMBLER_DIALECT == ASM_INTEL)
8967 /* This is the size of op from size of operand. */
8968 switch (GET_MODE_SIZE (GET_MODE (x)))
8977 #ifdef HAVE_GAS_FILDS_FISTS
8987 if (GET_MODE (x) == SFmode)
9002 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
9004 #ifdef GAS_MNEMONICS
9030 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
9032 PRINT_OPERAND (file, x, 0);
9038 /* Little bit of braindamage here. The SSE compare instructions
9039 does use completely different names for the comparisons that the
9040 fp conditional moves. */
9041 switch (GET_CODE (x))
9056 fputs ("unord", file);
9060 fputs ("neq", file);
9064 fputs ("nlt", file);
9068 fputs ("nle", file);
9071 fputs ("ord", file);
9078 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
9079 if (ASSEMBLER_DIALECT == ASM_ATT)
9081 switch (GET_MODE (x))
9083 case HImode: putc ('w', file); break;
9085 case SFmode: putc ('l', file); break;
9087 case DFmode: putc ('q', file); break;
9088 default: gcc_unreachable ();
9095 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
9098 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
9099 if (ASSEMBLER_DIALECT == ASM_ATT)
9102 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
9105 /* Like above, but reverse condition */
9107 /* Check to see if argument to %c is really a constant
9108 and not a condition code which needs to be reversed. */
9109 if (!COMPARISON_P (x))
9111 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
9114 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
9117 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
9118 if (ASSEMBLER_DIALECT == ASM_ATT)
9121 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
9125 /* It doesn't actually matter what mode we use here, as we're
9126 only going to use this for printing. */
9127 x = adjust_address_nv (x, DImode, 8);
9134 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
9137 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
9140 int pred_val = INTVAL (XEXP (x, 0));
9142 if (pred_val < REG_BR_PROB_BASE * 45 / 100
9143 || pred_val > REG_BR_PROB_BASE * 55 / 100)
9145 int taken = pred_val > REG_BR_PROB_BASE / 2;
9146 int cputaken = final_forward_branch_p (current_output_insn) == 0;
9148 /* Emit hints only in the case default branch prediction
9149 heuristics would fail. */
9150 if (taken != cputaken)
9152 /* We use 3e (DS) prefix for taken branches and
9153 2e (CS) prefix for not taken branches. */
9155 fputs ("ds ; ", file);
9157 fputs ("cs ; ", file);
9165 switch (GET_CODE (x))
9168 fputs ("neq", file);
9175 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
9179 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
9190 fputs ("unord", file);
9193 fputs ("ord", file);
9196 fputs ("ueq", file);
9199 fputs ("nlt", file);
9202 fputs ("nle", file);
9205 fputs ("ule", file);
9208 fputs ("ult", file);
9211 fputs ("une", file);
9220 fputs (" ; ", file);
9227 output_operand_lossage ("invalid operand code '%c'", code);
9232 print_reg (x, code, file);
9236 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
9237 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
9238 && GET_MODE (x) != BLKmode)
9241 switch (GET_MODE_SIZE (GET_MODE (x)))
9243 case 1: size = "BYTE"; break;
9244 case 2: size = "WORD"; break;
9245 case 4: size = "DWORD"; break;
9246 case 8: size = "QWORD"; break;
9247 case 12: size = "XWORD"; break;
9249 if (GET_MODE (x) == XFmode)
9258 /* Check for explicit size override (codes 'b', 'w' and 'k') */
9261 else if (code == 'w')
9263 else if (code == 'k')
9267 fputs (" PTR ", file);
9271 /* Avoid (%rip) for call operands. */
9272 if (CONSTANT_ADDRESS_P (x) && code == 'P'
9273 && !CONST_INT_P (x))
9274 output_addr_const (file, x);
9275 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
9276 output_operand_lossage ("invalid constraints for operand");
9281 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
9286 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9287 REAL_VALUE_TO_TARGET_SINGLE (r, l);
9289 if (ASSEMBLER_DIALECT == ASM_ATT)
9291 fprintf (file, "0x%08lx", l);
9294 /* These float cases don't actually occur as immediate operands. */
9295 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
9299 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
9300 fprintf (file, "%s", dstr);
9303 else if (GET_CODE (x) == CONST_DOUBLE
9304 && GET_MODE (x) == XFmode)
9308 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
9309 fprintf (file, "%s", dstr);
9314 /* We have patterns that allow zero sets of memory, for instance.
9315 In 64-bit mode, we should probably support all 8-byte vectors,
9316 since we can in fact encode that into an immediate. */
9317 if (GET_CODE (x) == CONST_VECTOR)
9319 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
9325 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
9327 if (ASSEMBLER_DIALECT == ASM_ATT)
9330 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
9331 || GET_CODE (x) == LABEL_REF)
9333 if (ASSEMBLER_DIALECT == ASM_ATT)
9336 fputs ("OFFSET FLAT:", file);
9339 if (CONST_INT_P (x))
9340 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
9342 output_pic_addr_const (file, x, code);
9344 output_addr_const (file, x);
9348 /* Print a memory operand whose address is ADDR. */
9351 print_operand_address (FILE *file, rtx addr)
9353 struct ix86_address parts;
9354 rtx base, index, disp;
9356 int ok = ix86_decompose_address (addr, &parts);
9361 index = parts.index;
9363 scale = parts.scale;
9371 if (ASSEMBLER_DIALECT == ASM_ATT)
9373 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
9379 /* Use one byte shorter RIP relative addressing for 64bit mode. */
9380 if (TARGET_64BIT && !base && !index)
9384 if (GET_CODE (disp) == CONST
9385 && GET_CODE (XEXP (disp, 0)) == PLUS
9386 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
9387 symbol = XEXP (XEXP (disp, 0), 0);
9389 if (GET_CODE (symbol) == LABEL_REF
9390 || (GET_CODE (symbol) == SYMBOL_REF
9391 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
9394 if (!base && !index)
9396 /* Displacement only requires special attention. */
9398 if (CONST_INT_P (disp))
9400 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
9401 fputs ("ds:", file);
9402 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
9405 output_pic_addr_const (file, disp, 0);
9407 output_addr_const (file, disp);
9411 if (ASSEMBLER_DIALECT == ASM_ATT)
9416 output_pic_addr_const (file, disp, 0);
9417 else if (GET_CODE (disp) == LABEL_REF)
9418 output_asm_label (disp);
9420 output_addr_const (file, disp);
9425 print_reg (base, 0, file);
9429 print_reg (index, 0, file);
9431 fprintf (file, ",%d", scale);
9437 rtx offset = NULL_RTX;
9441 /* Pull out the offset of a symbol; print any symbol itself. */
9442 if (GET_CODE (disp) == CONST
9443 && GET_CODE (XEXP (disp, 0)) == PLUS
9444 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
9446 offset = XEXP (XEXP (disp, 0), 1);
9447 disp = gen_rtx_CONST (VOIDmode,
9448 XEXP (XEXP (disp, 0), 0));
9452 output_pic_addr_const (file, disp, 0);
9453 else if (GET_CODE (disp) == LABEL_REF)
9454 output_asm_label (disp);
9455 else if (CONST_INT_P (disp))
9458 output_addr_const (file, disp);
9464 print_reg (base, 0, file);
9467 if (INTVAL (offset) >= 0)
9469 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
9473 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
9480 print_reg (index, 0, file);
9482 fprintf (file, "*%d", scale);
9490 output_addr_const_extra (FILE *file, rtx x)
9494 if (GET_CODE (x) != UNSPEC)
9497 op = XVECEXP (x, 0, 0);
9498 switch (XINT (x, 1))
9500 case UNSPEC_GOTTPOFF:
9501 output_addr_const (file, op);
9502 /* FIXME: This might be @TPOFF in Sun ld. */
9503 fputs ("@GOTTPOFF", file);
9506 output_addr_const (file, op);
9507 fputs ("@TPOFF", file);
9510 output_addr_const (file, op);
9512 fputs ("@TPOFF", file);
9514 fputs ("@NTPOFF", file);
9517 output_addr_const (file, op);
9518 fputs ("@DTPOFF", file);
9520 case UNSPEC_GOTNTPOFF:
9521 output_addr_const (file, op);
9523 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
9524 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
9526 fputs ("@GOTNTPOFF", file);
9528 case UNSPEC_INDNTPOFF:
9529 output_addr_const (file, op);
9530 fputs ("@INDNTPOFF", file);
9540 /* Split one or more DImode RTL references into pairs of SImode
9541 references. The RTL can be REG, offsettable MEM, integer constant, or
9542 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9543 split and "num" is its length. lo_half and hi_half are output arrays
9544 that parallel "operands". */
9547 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9551 rtx op = operands[num];
9553 /* simplify_subreg refuse to split volatile memory addresses,
9554 but we still have to handle it. */
9557 lo_half[num] = adjust_address (op, SImode, 0);
9558 hi_half[num] = adjust_address (op, SImode, 4);
9562 lo_half[num] = simplify_gen_subreg (SImode, op,
9563 GET_MODE (op) == VOIDmode
9564 ? DImode : GET_MODE (op), 0);
9565 hi_half[num] = simplify_gen_subreg (SImode, op,
9566 GET_MODE (op) == VOIDmode
9567 ? DImode : GET_MODE (op), 4);
9571 /* Split one or more TImode RTL references into pairs of DImode
9572 references. The RTL can be REG, offsettable MEM, integer constant, or
9573 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9574 split and "num" is its length. lo_half and hi_half are output arrays
9575 that parallel "operands". */
9578 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9582 rtx op = operands[num];
9584 /* simplify_subreg refuse to split volatile memory addresses, but we
9585 still have to handle it. */
9588 lo_half[num] = adjust_address (op, DImode, 0);
9589 hi_half[num] = adjust_address (op, DImode, 8);
9593 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
9594 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
9599 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
9600 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
9601 is the expression of the binary operation. The output may either be
9602 emitted here, or returned to the caller, like all output_* functions.
9604 There is no guarantee that the operands are the same mode, as they
9605 might be within FLOAT or FLOAT_EXTEND expressions. */
9607 #ifndef SYSV386_COMPAT
9608 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
9609 wants to fix the assemblers because that causes incompatibility
9610 with gcc. No-one wants to fix gcc because that causes
9611 incompatibility with assemblers... You can use the option of
9612 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
9613 #define SYSV386_COMPAT 1
9617 output_387_binary_op (rtx insn, rtx *operands)
9619 static char buf[30];
9622 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
9624 #ifdef ENABLE_CHECKING
9625 /* Even if we do not want to check the inputs, this documents input
9626 constraints. Which helps in understanding the following code. */
9627 if (STACK_REG_P (operands[0])
9628 && ((REG_P (operands[1])
9629 && REGNO (operands[0]) == REGNO (operands[1])
9630 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
9631 || (REG_P (operands[2])
9632 && REGNO (operands[0]) == REGNO (operands[2])
9633 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
9634 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
9637 gcc_assert (is_sse);
9640 switch (GET_CODE (operands[3]))
9643 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9644 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9652 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9653 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9661 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9662 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9670 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9671 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9685 if (GET_MODE (operands[0]) == SFmode)
9686 strcat (buf, "ss\t{%2, %0|%0, %2}");
9688 strcat (buf, "sd\t{%2, %0|%0, %2}");
9693 switch (GET_CODE (operands[3]))
9697 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
9699 rtx temp = operands[2];
9700 operands[2] = operands[1];
9704 /* know operands[0] == operands[1]. */
9706 if (MEM_P (operands[2]))
9712 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9714 if (STACK_TOP_P (operands[0]))
9715 /* How is it that we are storing to a dead operand[2]?
9716 Well, presumably operands[1] is dead too. We can't
9717 store the result to st(0) as st(0) gets popped on this
9718 instruction. Instead store to operands[2] (which I
9719 think has to be st(1)). st(1) will be popped later.
9720 gcc <= 2.8.1 didn't have this check and generated
9721 assembly code that the Unixware assembler rejected. */
9722 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9724 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9728 if (STACK_TOP_P (operands[0]))
9729 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9731 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9736 if (MEM_P (operands[1]))
9742 if (MEM_P (operands[2]))
9748 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9751 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9752 derived assemblers, confusingly reverse the direction of
9753 the operation for fsub{r} and fdiv{r} when the
9754 destination register is not st(0). The Intel assembler
9755 doesn't have this brain damage. Read !SYSV386_COMPAT to
9756 figure out what the hardware really does. */
9757 if (STACK_TOP_P (operands[0]))
9758 p = "{p\t%0, %2|rp\t%2, %0}";
9760 p = "{rp\t%2, %0|p\t%0, %2}";
9762 if (STACK_TOP_P (operands[0]))
9763 /* As above for fmul/fadd, we can't store to st(0). */
9764 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9766 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9771 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
9774 if (STACK_TOP_P (operands[0]))
9775 p = "{rp\t%0, %1|p\t%1, %0}";
9777 p = "{p\t%1, %0|rp\t%0, %1}";
9779 if (STACK_TOP_P (operands[0]))
9780 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9782 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9787 if (STACK_TOP_P (operands[0]))
9789 if (STACK_TOP_P (operands[1]))
9790 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9792 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9795 else if (STACK_TOP_P (operands[1]))
9798 p = "{\t%1, %0|r\t%0, %1}";
9800 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9806 p = "{r\t%2, %0|\t%0, %2}";
9808 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9821 /* Return needed mode for entity in optimize_mode_switching pass. */
9824 ix86_mode_needed (int entity, rtx insn)
9826 enum attr_i387_cw mode;
9828 /* The mode UNINITIALIZED is used to store control word after a
9829 function call or ASM pattern. The mode ANY specify that function
9830 has no requirements on the control word and make no changes in the
9831 bits we are interested in. */
9834 || (NONJUMP_INSN_P (insn)
9835 && (asm_noperands (PATTERN (insn)) >= 0
9836 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
9837 return I387_CW_UNINITIALIZED;
9839 if (recog_memoized (insn) < 0)
9842 mode = get_attr_i387_cw (insn);
9847 if (mode == I387_CW_TRUNC)
9852 if (mode == I387_CW_FLOOR)
9857 if (mode == I387_CW_CEIL)
9862 if (mode == I387_CW_MASK_PM)
9873 /* Output code to initialize control word copies used by trunc?f?i and
9874 rounding patterns. CURRENT_MODE is set to current control word,
9875 while NEW_MODE is set to new control word. */
9878 emit_i387_cw_initialization (int mode)
9880 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
9883 enum ix86_stack_slot slot;
9885 rtx reg = gen_reg_rtx (HImode);
9887 emit_insn (gen_x86_fnstcw_1 (stored_mode));
9888 emit_move_insn (reg, copy_rtx (stored_mode));
9890 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
9895 /* round toward zero (truncate) */
9896 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
9897 slot = SLOT_CW_TRUNC;
9901 /* round down toward -oo */
9902 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9903 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
9904 slot = SLOT_CW_FLOOR;
9908 /* round up toward +oo */
9909 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9910 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
9911 slot = SLOT_CW_CEIL;
9914 case I387_CW_MASK_PM:
9915 /* mask precision exception for nearbyint() */
9916 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9917 slot = SLOT_CW_MASK_PM;
9929 /* round toward zero (truncate) */
9930 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
9931 slot = SLOT_CW_TRUNC;
9935 /* round down toward -oo */
9936 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
9937 slot = SLOT_CW_FLOOR;
9941 /* round up toward +oo */
9942 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
9943 slot = SLOT_CW_CEIL;
9946 case I387_CW_MASK_PM:
9947 /* mask precision exception for nearbyint() */
9948 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9949 slot = SLOT_CW_MASK_PM;
9957 gcc_assert (slot < MAX_386_STACK_LOCALS);
9959 new_mode = assign_386_stack_local (HImode, slot);
9960 emit_move_insn (new_mode, reg);
9963 /* Output code for INSN to convert a float to a signed int. OPERANDS
9964 are the insn operands. The output may be [HSD]Imode and the input
9965 operand may be [SDX]Fmode. */
9968 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9970 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9971 int dimode_p = GET_MODE (operands[0]) == DImode;
9972 int round_mode = get_attr_i387_cw (insn);
9974 /* Jump through a hoop or two for DImode, since the hardware has no
9975 non-popping instruction. We used to do this a different way, but
9976 that was somewhat fragile and broke with post-reload splitters. */
9977 if ((dimode_p || fisttp) && !stack_top_dies)
9978 output_asm_insn ("fld\t%y1", operands);
9980 gcc_assert (STACK_TOP_P (operands[1]));
9981 gcc_assert (MEM_P (operands[0]));
9982 gcc_assert (GET_MODE (operands[1]) != TFmode);
9985 output_asm_insn ("fisttp%z0\t%0", operands);
9988 if (round_mode != I387_CW_ANY)
9989 output_asm_insn ("fldcw\t%3", operands);
9990 if (stack_top_dies || dimode_p)
9991 output_asm_insn ("fistp%z0\t%0", operands);
9993 output_asm_insn ("fist%z0\t%0", operands);
9994 if (round_mode != I387_CW_ANY)
9995 output_asm_insn ("fldcw\t%2", operands);
10001 /* Output code for x87 ffreep insn. The OPNO argument, which may only
10002 have the values zero or one, indicates the ffreep insn's operand
10003 from the OPERANDS array. */
10005 static const char *
10006 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
10008 if (TARGET_USE_FFREEP)
10009 #if HAVE_AS_IX86_FFREEP
10010 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
10013 static char retval[] = ".word\t0xc_df";
10014 int regno = REGNO (operands[opno]);
10016 gcc_assert (FP_REGNO_P (regno));
10018 retval[9] = '0' + (regno - FIRST_STACK_REG);
10023 return opno ? "fstp\t%y1" : "fstp\t%y0";
10027 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
10028 should be used. UNORDERED_P is true when fucom should be used. */
10031 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
10033 int stack_top_dies;
10034 rtx cmp_op0, cmp_op1;
10035 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
10039 cmp_op0 = operands[0];
10040 cmp_op1 = operands[1];
10044 cmp_op0 = operands[1];
10045 cmp_op1 = operands[2];
10050 if (GET_MODE (operands[0]) == SFmode)
10052 return "ucomiss\t{%1, %0|%0, %1}";
10054 return "comiss\t{%1, %0|%0, %1}";
10057 return "ucomisd\t{%1, %0|%0, %1}";
10059 return "comisd\t{%1, %0|%0, %1}";
10062 gcc_assert (STACK_TOP_P (cmp_op0));
10064 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
10066 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
10068 if (stack_top_dies)
10070 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
10071 return output_387_ffreep (operands, 1);
10074 return "ftst\n\tfnstsw\t%0";
10077 if (STACK_REG_P (cmp_op1)
10079 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
10080 && REGNO (cmp_op1) != FIRST_STACK_REG)
10082 /* If both the top of the 387 stack dies, and the other operand
10083 is also a stack register that dies, then this must be a
10084 `fcompp' float compare */
10088 /* There is no double popping fcomi variant. Fortunately,
10089 eflags is immune from the fstp's cc clobbering. */
10091 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
10093 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
10094 return output_387_ffreep (operands, 0);
10099 return "fucompp\n\tfnstsw\t%0";
10101 return "fcompp\n\tfnstsw\t%0";
10106 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
10108 static const char * const alt[16] =
10110 "fcom%z2\t%y2\n\tfnstsw\t%0",
10111 "fcomp%z2\t%y2\n\tfnstsw\t%0",
10112 "fucom%z2\t%y2\n\tfnstsw\t%0",
10113 "fucomp%z2\t%y2\n\tfnstsw\t%0",
10115 "ficom%z2\t%y2\n\tfnstsw\t%0",
10116 "ficomp%z2\t%y2\n\tfnstsw\t%0",
10120 "fcomi\t{%y1, %0|%0, %y1}",
10121 "fcomip\t{%y1, %0|%0, %y1}",
10122 "fucomi\t{%y1, %0|%0, %y1}",
10123 "fucomip\t{%y1, %0|%0, %y1}",
10134 mask = eflags_p << 3;
10135 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
10136 mask |= unordered_p << 1;
10137 mask |= stack_top_dies;
10139 gcc_assert (mask < 16);
10148 ix86_output_addr_vec_elt (FILE *file, int value)
10150 const char *directive = ASM_LONG;
10154 directive = ASM_QUAD;
10156 gcc_assert (!TARGET_64BIT);
10159 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
10163 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
10165 const char *directive = ASM_LONG;
10168 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
10169 directive = ASM_QUAD;
10171 gcc_assert (!TARGET_64BIT);
10173 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
10174 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
10175 fprintf (file, "%s%s%d-%s%d\n",
10176 directive, LPREFIX, value, LPREFIX, rel);
10177 else if (HAVE_AS_GOTOFF_IN_DATA)
10178 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
10180 else if (TARGET_MACHO)
10182 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
10183 machopic_output_function_base_name (file);
10184 fprintf(file, "\n");
10188 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
10189 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
10192 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
10196 ix86_expand_clear (rtx dest)
10200 /* We play register width games, which are only valid after reload. */
10201 gcc_assert (reload_completed);
10203 /* Avoid HImode and its attendant prefix byte. */
10204 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
10205 dest = gen_rtx_REG (SImode, REGNO (dest));
10206 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
10208 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
10209 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
10211 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10212 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
10218 /* X is an unchanging MEM. If it is a constant pool reference, return
10219 the constant pool rtx, else NULL. */
10222 maybe_get_pool_constant (rtx x)
10224 x = ix86_delegitimize_address (XEXP (x, 0));
10226 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
10227 return get_pool_constant (x);
10233 ix86_expand_move (enum machine_mode mode, rtx operands[])
10236 enum tls_model model;
10241 if (GET_CODE (op1) == SYMBOL_REF)
10243 model = SYMBOL_REF_TLS_MODEL (op1);
10246 op1 = legitimize_tls_address (op1, model, true);
10247 op1 = force_operand (op1, op0);
10251 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10252 && SYMBOL_REF_DLLIMPORT_P (op1))
10253 op1 = legitimize_dllimport_symbol (op1, false);
10255 else if (GET_CODE (op1) == CONST
10256 && GET_CODE (XEXP (op1, 0)) == PLUS
10257 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
10259 rtx addend = XEXP (XEXP (op1, 0), 1);
10260 rtx symbol = XEXP (XEXP (op1, 0), 0);
10263 model = SYMBOL_REF_TLS_MODEL (symbol);
10265 tmp = legitimize_tls_address (symbol, model, true);
10266 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10267 && SYMBOL_REF_DLLIMPORT_P (symbol))
10268 tmp = legitimize_dllimport_symbol (symbol, true);
10272 tmp = force_operand (tmp, NULL);
10273 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
10274 op0, 1, OPTAB_DIRECT);
10280 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
10282 if (TARGET_MACHO && !TARGET_64BIT)
10287 rtx temp = ((reload_in_progress
10288 || ((op0 && REG_P (op0))
10290 ? op0 : gen_reg_rtx (Pmode));
10291 op1 = machopic_indirect_data_reference (op1, temp);
10292 op1 = machopic_legitimize_pic_address (op1, mode,
10293 temp == op1 ? 0 : temp);
10295 else if (MACHOPIC_INDIRECT)
10296 op1 = machopic_indirect_data_reference (op1, 0);
10304 op1 = force_reg (Pmode, op1);
10305 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
10307 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
10308 op1 = legitimize_pic_address (op1, reg);
10317 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
10318 || !push_operand (op0, mode))
10320 op1 = force_reg (mode, op1);
10322 if (push_operand (op0, mode)
10323 && ! general_no_elim_operand (op1, mode))
10324 op1 = copy_to_mode_reg (mode, op1);
10326 /* Force large constants in 64bit compilation into register
10327 to get them CSEed. */
10328 if (can_create_pseudo_p ()
10329 && (mode == DImode) && TARGET_64BIT
10330 && immediate_operand (op1, mode)
10331 && !x86_64_zext_immediate_operand (op1, VOIDmode)
10332 && !register_operand (op0, mode)
10334 op1 = copy_to_mode_reg (mode, op1);
10336 if (can_create_pseudo_p ()
10337 && FLOAT_MODE_P (mode)
10338 && GET_CODE (op1) == CONST_DOUBLE)
10340 /* If we are loading a floating point constant to a register,
10341 force the value to memory now, since we'll get better code
10342 out the back end. */
10344 op1 = validize_mem (force_const_mem (mode, op1));
10345 if (!register_operand (op0, mode))
10347 rtx temp = gen_reg_rtx (mode);
10348 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
10349 emit_move_insn (op0, temp);
10355 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
10359 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
10361 rtx op0 = operands[0], op1 = operands[1];
10362 unsigned int align = GET_MODE_ALIGNMENT (mode);
10364 /* Force constants other than zero into memory. We do not know how
10365 the instructions used to build constants modify the upper 64 bits
10366 of the register, once we have that information we may be able
10367 to handle some of them more efficiently. */
10368 if (can_create_pseudo_p ()
10369 && register_operand (op0, mode)
10370 && (CONSTANT_P (op1)
10371 || (GET_CODE (op1) == SUBREG
10372 && CONSTANT_P (SUBREG_REG (op1))))
10373 && standard_sse_constant_p (op1) <= 0)
10374 op1 = validize_mem (force_const_mem (mode, op1));
10376 /* TDmode values are passed as TImode on the stack. TImode values
10377 are moved via xmm registers, and moving them to stack can result in
10378 unaligned memory access. Use ix86_expand_vector_move_misalign()
10379 if memory operand is not aligned correctly. */
10380 if (can_create_pseudo_p ()
10381 && (mode == TImode) && !TARGET_64BIT
10382 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
10383 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
10387 /* ix86_expand_vector_move_misalign() does not like constants ... */
10388 if (CONSTANT_P (op1)
10389 || (GET_CODE (op1) == SUBREG
10390 && CONSTANT_P (SUBREG_REG (op1))))
10391 op1 = validize_mem (force_const_mem (mode, op1));
10393 /* ... nor both arguments in memory. */
10394 if (!register_operand (op0, mode)
10395 && !register_operand (op1, mode))
10396 op1 = force_reg (mode, op1);
10398 tmp[0] = op0; tmp[1] = op1;
10399 ix86_expand_vector_move_misalign (mode, tmp);
10403 /* Make operand1 a register if it isn't already. */
10404 if (can_create_pseudo_p ()
10405 && !register_operand (op0, mode)
10406 && !register_operand (op1, mode))
10408 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
10412 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
10415 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
10416 straight to ix86_expand_vector_move. */
10417 /* Code generation for scalar reg-reg moves of single and double precision data:
10418 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
10422 if (x86_sse_partial_reg_dependency == true)
10427 Code generation for scalar loads of double precision data:
10428 if (x86_sse_split_regs == true)
10429 movlpd mem, reg (gas syntax)
10433 Code generation for unaligned packed loads of single precision data
10434 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
10435 if (x86_sse_unaligned_move_optimal)
10438 if (x86_sse_partial_reg_dependency == true)
10450 Code generation for unaligned packed loads of double precision data
10451 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
10452 if (x86_sse_unaligned_move_optimal)
10455 if (x86_sse_split_regs == true)
10468 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
10477 /* If we're optimizing for size, movups is the smallest. */
10480 op0 = gen_lowpart (V4SFmode, op0);
10481 op1 = gen_lowpart (V4SFmode, op1);
10482 emit_insn (gen_sse_movups (op0, op1));
10486 /* ??? If we have typed data, then it would appear that using
10487 movdqu is the only way to get unaligned data loaded with
10489 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10491 op0 = gen_lowpart (V16QImode, op0);
10492 op1 = gen_lowpart (V16QImode, op1);
10493 emit_insn (gen_sse2_movdqu (op0, op1));
10497 if (TARGET_SSE2 && mode == V2DFmode)
10501 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10503 op0 = gen_lowpart (V2DFmode, op0);
10504 op1 = gen_lowpart (V2DFmode, op1);
10505 emit_insn (gen_sse2_movupd (op0, op1));
10509 /* When SSE registers are split into halves, we can avoid
10510 writing to the top half twice. */
10511 if (TARGET_SSE_SPLIT_REGS)
10513 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
10518 /* ??? Not sure about the best option for the Intel chips.
10519 The following would seem to satisfy; the register is
10520 entirely cleared, breaking the dependency chain. We
10521 then store to the upper half, with a dependency depth
10522 of one. A rumor has it that Intel recommends two movsd
10523 followed by an unpacklpd, but this is unconfirmed. And
10524 given that the dependency depth of the unpacklpd would
10525 still be one, I'm not sure why this would be better. */
10526 zero = CONST0_RTX (V2DFmode);
10529 m = adjust_address (op1, DFmode, 0);
10530 emit_insn (gen_sse2_loadlpd (op0, zero, m));
10531 m = adjust_address (op1, DFmode, 8);
10532 emit_insn (gen_sse2_loadhpd (op0, op0, m));
10536 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10538 op0 = gen_lowpart (V4SFmode, op0);
10539 op1 = gen_lowpart (V4SFmode, op1);
10540 emit_insn (gen_sse_movups (op0, op1));
10544 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
10545 emit_move_insn (op0, CONST0_RTX (mode));
10547 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
10549 if (mode != V4SFmode)
10550 op0 = gen_lowpart (V4SFmode, op0);
10551 m = adjust_address (op1, V2SFmode, 0);
10552 emit_insn (gen_sse_loadlps (op0, op0, m));
10553 m = adjust_address (op1, V2SFmode, 8);
10554 emit_insn (gen_sse_loadhps (op0, op0, m));
10557 else if (MEM_P (op0))
10559 /* If we're optimizing for size, movups is the smallest. */
10562 op0 = gen_lowpart (V4SFmode, op0);
10563 op1 = gen_lowpart (V4SFmode, op1);
10564 emit_insn (gen_sse_movups (op0, op1));
10568 /* ??? Similar to above, only less clear because of quote
10569 typeless stores unquote. */
10570 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
10571 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10573 op0 = gen_lowpart (V16QImode, op0);
10574 op1 = gen_lowpart (V16QImode, op1);
10575 emit_insn (gen_sse2_movdqu (op0, op1));
10579 if (TARGET_SSE2 && mode == V2DFmode)
10581 m = adjust_address (op0, DFmode, 0);
10582 emit_insn (gen_sse2_storelpd (m, op1));
10583 m = adjust_address (op0, DFmode, 8);
10584 emit_insn (gen_sse2_storehpd (m, op1));
10588 if (mode != V4SFmode)
10589 op1 = gen_lowpart (V4SFmode, op1);
10590 m = adjust_address (op0, V2SFmode, 0);
10591 emit_insn (gen_sse_storelps (m, op1));
10592 m = adjust_address (op0, V2SFmode, 8);
10593 emit_insn (gen_sse_storehps (m, op1));
10597 gcc_unreachable ();
10600 /* Expand a push in MODE. This is some mode for which we do not support
10601 proper push instructions, at least from the registers that we expect
10602 the value to live in. */
10605 ix86_expand_push (enum machine_mode mode, rtx x)
10609 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
10610 GEN_INT (-GET_MODE_SIZE (mode)),
10611 stack_pointer_rtx, 1, OPTAB_DIRECT);
10612 if (tmp != stack_pointer_rtx)
10613 emit_move_insn (stack_pointer_rtx, tmp);
10615 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
10616 emit_move_insn (tmp, x);
10619 /* Helper function of ix86_fixup_binary_operands to canonicalize
10620 operand order. Returns true if the operands should be swapped. */
10623 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
10626 rtx dst = operands[0];
10627 rtx src1 = operands[1];
10628 rtx src2 = operands[2];
10630 /* If the operation is not commutative, we can't do anything. */
10631 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
10634 /* Highest priority is that src1 should match dst. */
10635 if (rtx_equal_p (dst, src1))
10637 if (rtx_equal_p (dst, src2))
10640 /* Next highest priority is that immediate constants come second. */
10641 if (immediate_operand (src2, mode))
10643 if (immediate_operand (src1, mode))
10646 /* Lowest priority is that memory references should come second. */
10656 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
10657 destination to use for the operation. If different from the true
10658 destination in operands[0], a copy operation will be required. */
10661 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
10664 rtx dst = operands[0];
10665 rtx src1 = operands[1];
10666 rtx src2 = operands[2];
10668 /* Canonicalize operand order. */
10669 if (ix86_swap_binary_operands_p (code, mode, operands))
10676 /* Both source operands cannot be in memory. */
10677 if (MEM_P (src1) && MEM_P (src2))
10679 /* Optimization: Only read from memory once. */
10680 if (rtx_equal_p (src1, src2))
10682 src2 = force_reg (mode, src2);
10686 src2 = force_reg (mode, src2);
10689 /* If the destination is memory, and we do not have matching source
10690 operands, do things in registers. */
10691 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10692 dst = gen_reg_rtx (mode);
10694 /* Source 1 cannot be a constant. */
10695 if (CONSTANT_P (src1))
10696 src1 = force_reg (mode, src1);
10698 /* Source 1 cannot be a non-matching memory. */
10699 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10700 src1 = force_reg (mode, src1);
10702 operands[1] = src1;
10703 operands[2] = src2;
10707 /* Similarly, but assume that the destination has already been
10708 set up properly. */
10711 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
10712 enum machine_mode mode, rtx operands[])
10714 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
10715 gcc_assert (dst == operands[0]);
10718 /* Attempt to expand a binary operator. Make the expansion closer to the
10719 actual machine, then just general_operand, which will allow 3 separate
10720 memory references (one output, two input) in a single insn. */
10723 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
10726 rtx src1, src2, dst, op, clob;
10728 dst = ix86_fixup_binary_operands (code, mode, operands);
10729 src1 = operands[1];
10730 src2 = operands[2];
10732 /* Emit the instruction. */
10734 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
10735 if (reload_in_progress)
10737 /* Reload doesn't know about the flags register, and doesn't know that
10738 it doesn't want to clobber it. We can only do this with PLUS. */
10739 gcc_assert (code == PLUS);
10744 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10745 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10748 /* Fix up the destination if needed. */
10749 if (dst != operands[0])
10750 emit_move_insn (operands[0], dst);
10753 /* Return TRUE or FALSE depending on whether the binary operator meets the
10754 appropriate constraints. */
10757 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
10760 rtx dst = operands[0];
10761 rtx src1 = operands[1];
10762 rtx src2 = operands[2];
10764 /* Both source operands cannot be in memory. */
10765 if (MEM_P (src1) && MEM_P (src2))
10768 /* Canonicalize operand order for commutative operators. */
10769 if (ix86_swap_binary_operands_p (code, mode, operands))
10776 /* If the destination is memory, we must have a matching source operand. */
10777 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10780 /* Source 1 cannot be a constant. */
10781 if (CONSTANT_P (src1))
10784 /* Source 1 cannot be a non-matching memory. */
10785 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10791 /* Attempt to expand a unary operator. Make the expansion closer to the
10792 actual machine, then just general_operand, which will allow 2 separate
10793 memory references (one output, one input) in a single insn. */
10796 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
10799 int matching_memory;
10800 rtx src, dst, op, clob;
10805 /* If the destination is memory, and we do not have matching source
10806 operands, do things in registers. */
10807 matching_memory = 0;
10810 if (rtx_equal_p (dst, src))
10811 matching_memory = 1;
10813 dst = gen_reg_rtx (mode);
10816 /* When source operand is memory, destination must match. */
10817 if (MEM_P (src) && !matching_memory)
10818 src = force_reg (mode, src);
10820 /* Emit the instruction. */
10822 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
10823 if (reload_in_progress || code == NOT)
10825 /* Reload doesn't know about the flags register, and doesn't know that
10826 it doesn't want to clobber it. */
10827 gcc_assert (code == NOT);
10832 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10833 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10836 /* Fix up the destination if needed. */
10837 if (dst != operands[0])
10838 emit_move_insn (operands[0], dst);
10841 /* Return TRUE or FALSE depending on whether the unary operator meets the
10842 appropriate constraints. */
10845 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
10846 enum machine_mode mode ATTRIBUTE_UNUSED,
10847 rtx operands[2] ATTRIBUTE_UNUSED)
10849 /* If one of operands is memory, source and destination must match. */
10850 if ((MEM_P (operands[0])
10851 || MEM_P (operands[1]))
10852 && ! rtx_equal_p (operands[0], operands[1]))
10857 /* Post-reload splitter for converting an SF or DFmode value in an
10858 SSE register into an unsigned SImode. */
10861 ix86_split_convert_uns_si_sse (rtx operands[])
10863 enum machine_mode vecmode;
10864 rtx value, large, zero_or_two31, input, two31, x;
10866 large = operands[1];
10867 zero_or_two31 = operands[2];
10868 input = operands[3];
10869 two31 = operands[4];
10870 vecmode = GET_MODE (large);
10871 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
10873 /* Load up the value into the low element. We must ensure that the other
10874 elements are valid floats -- zero is the easiest such value. */
10877 if (vecmode == V4SFmode)
10878 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
10880 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
10884 input = gen_rtx_REG (vecmode, REGNO (input));
10885 emit_move_insn (value, CONST0_RTX (vecmode));
10886 if (vecmode == V4SFmode)
10887 emit_insn (gen_sse_movss (value, value, input));
10889 emit_insn (gen_sse2_movsd (value, value, input));
10892 emit_move_insn (large, two31);
10893 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
10895 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
10896 emit_insn (gen_rtx_SET (VOIDmode, large, x));
10898 x = gen_rtx_AND (vecmode, zero_or_two31, large);
10899 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
10901 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
10902 emit_insn (gen_rtx_SET (VOIDmode, value, x));
10904 large = gen_rtx_REG (V4SImode, REGNO (large));
10905 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
10907 x = gen_rtx_REG (V4SImode, REGNO (value));
10908 if (vecmode == V4SFmode)
10909 emit_insn (gen_sse2_cvttps2dq (x, value));
10911 emit_insn (gen_sse2_cvttpd2dq (x, value));
10914 emit_insn (gen_xorv4si3 (value, value, large));
10917 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10918 Expects the 64-bit DImode to be supplied in a pair of integral
10919 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10920 -mfpmath=sse, !optimize_size only. */
10923 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
10925 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
10926 rtx int_xmm, fp_xmm;
10927 rtx biases, exponents;
10930 int_xmm = gen_reg_rtx (V4SImode);
10931 if (TARGET_INTER_UNIT_MOVES)
10932 emit_insn (gen_movdi_to_sse (int_xmm, input));
10933 else if (TARGET_SSE_SPLIT_REGS)
10935 emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
10936 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
10940 x = gen_reg_rtx (V2DImode);
10941 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
10942 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
10945 x = gen_rtx_CONST_VECTOR (V4SImode,
10946 gen_rtvec (4, GEN_INT (0x43300000UL),
10947 GEN_INT (0x45300000UL),
10948 const0_rtx, const0_rtx));
10949 exponents = validize_mem (force_const_mem (V4SImode, x));
10951 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10952 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
10954 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10955 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10956 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10957 (0x1.0p84 + double(fp_value_hi_xmm)).
10958 Note these exponents differ by 32. */
10960 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
10962 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10963 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10964 real_ldexp (&bias_lo_rvt, &dconst1, 52);
10965 real_ldexp (&bias_hi_rvt, &dconst1, 84);
10966 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
10967 x = const_double_from_real_value (bias_hi_rvt, DFmode);
10968 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
10969 biases = validize_mem (force_const_mem (V2DFmode, biases));
10970 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
10972 /* Add the upper and lower DFmode values together. */
10974 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
10977 x = copy_to_mode_reg (V2DFmode, fp_xmm);
10978 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
10979 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
10982 ix86_expand_vector_extract (false, target, fp_xmm, 0);
10985 /* Not used, but eases macroization of patterns. */
10987 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
10988 rtx input ATTRIBUTE_UNUSED)
10990 gcc_unreachable ();
10993 /* Convert an unsigned SImode value into a DFmode. Only currently used
10994 for SSE, but applicable anywhere. */
10997 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
10999 REAL_VALUE_TYPE TWO31r;
11002 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
11003 NULL, 1, OPTAB_DIRECT);
11005 fp = gen_reg_rtx (DFmode);
11006 emit_insn (gen_floatsidf2 (fp, x));
11008 real_ldexp (&TWO31r, &dconst1, 31);
11009 x = const_double_from_real_value (TWO31r, DFmode);
11011 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
11013 emit_move_insn (target, x);
11016 /* Convert a signed DImode value into a DFmode. Only used for SSE in
11017 32-bit mode; otherwise we have a direct convert instruction. */
11020 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
11022 REAL_VALUE_TYPE TWO32r;
11023 rtx fp_lo, fp_hi, x;
11025 fp_lo = gen_reg_rtx (DFmode);
11026 fp_hi = gen_reg_rtx (DFmode);
11028 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
11030 real_ldexp (&TWO32r, &dconst1, 32);
11031 x = const_double_from_real_value (TWO32r, DFmode);
11032 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
11034 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
11036 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
11039 emit_move_insn (target, x);
11042 /* Convert an unsigned SImode value into a SFmode, using only SSE.
11043 For x86_32, -mfpmath=sse, !optimize_size only. */
11045 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
11047 REAL_VALUE_TYPE ONE16r;
11048 rtx fp_hi, fp_lo, int_hi, int_lo, x;
11050 real_ldexp (&ONE16r, &dconst1, 16);
11051 x = const_double_from_real_value (ONE16r, SFmode);
11052 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
11053 NULL, 0, OPTAB_DIRECT);
11054 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
11055 NULL, 0, OPTAB_DIRECT);
11056 fp_hi = gen_reg_rtx (SFmode);
11057 fp_lo = gen_reg_rtx (SFmode);
11058 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
11059 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
11060 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
11062 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
11064 if (!rtx_equal_p (target, fp_hi))
11065 emit_move_insn (target, fp_hi);
11068 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
11069 then replicate the value for all elements of the vector
11073 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
11080 v = gen_rtvec (4, value, value, value, value);
11081 return gen_rtx_CONST_VECTOR (V4SImode, v);
11085 v = gen_rtvec (2, value, value);
11086 return gen_rtx_CONST_VECTOR (V2DImode, v);
11090 v = gen_rtvec (4, value, value, value, value);
11092 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
11093 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
11094 return gen_rtx_CONST_VECTOR (V4SFmode, v);
11098 v = gen_rtvec (2, value, value);
11100 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
11101 return gen_rtx_CONST_VECTOR (V2DFmode, v);
11104 gcc_unreachable ();
11108 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
11109 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
11110 for an SSE register. If VECT is true, then replicate the mask for
11111 all elements of the vector register. If INVERT is true, then create
11112 a mask excluding the sign bit. */
11115 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
11117 enum machine_mode vec_mode, imode;
11118 HOST_WIDE_INT hi, lo;
11123 /* Find the sign bit, sign extended to 2*HWI. */
11129 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
11130 lo = 0x80000000, hi = lo < 0;
11136 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
11137 if (HOST_BITS_PER_WIDE_INT >= 64)
11138 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
11140 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
11146 vec_mode = VOIDmode;
11147 gcc_assert (HOST_BITS_PER_WIDE_INT >= 64);
11148 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
11152 gcc_unreachable ();
11156 lo = ~lo, hi = ~hi;
11158 /* Force this value into the low part of a fp vector constant. */
11159 mask = immed_double_const (lo, hi, imode);
11160 mask = gen_lowpart (mode, mask);
11162 if (vec_mode == VOIDmode)
11163 return force_reg (mode, mask);
11165 v = ix86_build_const_vector (mode, vect, mask);
11166 return force_reg (vec_mode, v);
11169 /* Generate code for floating point ABS or NEG. */
11172 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
11175 rtx mask, set, use, clob, dst, src;
11176 bool use_sse = false;
11177 bool vector_mode = VECTOR_MODE_P (mode);
11178 enum machine_mode elt_mode = mode;
11182 elt_mode = GET_MODE_INNER (mode);
11185 else if (mode == TFmode)
11187 else if (TARGET_SSE_MATH)
11188 use_sse = SSE_FLOAT_MODE_P (mode);
11190 /* NEG and ABS performed with SSE use bitwise mask operations.
11191 Create the appropriate mask now. */
11193 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
11202 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
11203 set = gen_rtx_SET (VOIDmode, dst, set);
11208 set = gen_rtx_fmt_e (code, mode, src);
11209 set = gen_rtx_SET (VOIDmode, dst, set);
11212 use = gen_rtx_USE (VOIDmode, mask);
11213 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
11214 emit_insn (gen_rtx_PARALLEL (VOIDmode,
11215 gen_rtvec (3, set, use, clob)));
11222 /* Expand a copysign operation. Special case operand 0 being a constant. */
11225 ix86_expand_copysign (rtx operands[])
11227 enum machine_mode mode, vmode;
11228 rtx dest, op0, op1, mask, nmask;
11230 dest = operands[0];
11234 mode = GET_MODE (dest);
11235 vmode = mode == SFmode ? V4SFmode : V2DFmode;
11237 if (GET_CODE (op0) == CONST_DOUBLE)
11239 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
11241 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
11242 op0 = simplify_unary_operation (ABS, mode, op0, mode);
11244 if (mode == SFmode || mode == DFmode)
11246 if (op0 == CONST0_RTX (mode))
11247 op0 = CONST0_RTX (vmode);
11252 if (mode == SFmode)
11253 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
11254 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
11256 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
11257 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
11261 mask = ix86_build_signbit_mask (mode, 0, 0);
11263 if (mode == SFmode)
11264 copysign_insn = gen_copysignsf3_const;
11265 else if (mode == DFmode)
11266 copysign_insn = gen_copysigndf3_const;
11268 copysign_insn = gen_copysigntf3_const;
11270 emit_insn (copysign_insn (dest, op0, op1, mask));
11274 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
11276 nmask = ix86_build_signbit_mask (mode, 0, 1);
11277 mask = ix86_build_signbit_mask (mode, 0, 0);
11279 if (mode == SFmode)
11280 copysign_insn = gen_copysignsf3_var;
11281 else if (mode == DFmode)
11282 copysign_insn = gen_copysigndf3_var;
11284 copysign_insn = gen_copysigntf3_var;
11286 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
11290 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
11291 be a constant, and so has already been expanded into a vector constant. */
11294 ix86_split_copysign_const (rtx operands[])
11296 enum machine_mode mode, vmode;
11297 rtx dest, op0, op1, mask, x;
11299 dest = operands[0];
11302 mask = operands[3];
11304 mode = GET_MODE (dest);
11305 vmode = GET_MODE (mask);
11307 dest = simplify_gen_subreg (vmode, dest, mode, 0);
11308 x = gen_rtx_AND (vmode, dest, mask);
11309 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11311 if (op0 != CONST0_RTX (vmode))
11313 x = gen_rtx_IOR (vmode, dest, op0);
11314 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11318 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
11319 so we have to do two masks. */
11322 ix86_split_copysign_var (rtx operands[])
11324 enum machine_mode mode, vmode;
11325 rtx dest, scratch, op0, op1, mask, nmask, x;
11327 dest = operands[0];
11328 scratch = operands[1];
11331 nmask = operands[4];
11332 mask = operands[5];
11334 mode = GET_MODE (dest);
11335 vmode = GET_MODE (mask);
11337 if (rtx_equal_p (op0, op1))
11339 /* Shouldn't happen often (it's useless, obviously), but when it does
11340 we'd generate incorrect code if we continue below. */
11341 emit_move_insn (dest, op0);
11345 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
11347 gcc_assert (REGNO (op1) == REGNO (scratch));
11349 x = gen_rtx_AND (vmode, scratch, mask);
11350 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
11353 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
11354 x = gen_rtx_NOT (vmode, dest);
11355 x = gen_rtx_AND (vmode, x, op0);
11356 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11360 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
11362 x = gen_rtx_AND (vmode, scratch, mask);
11364 else /* alternative 2,4 */
11366 gcc_assert (REGNO (mask) == REGNO (scratch));
11367 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
11368 x = gen_rtx_AND (vmode, scratch, op1);
11370 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
11372 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
11374 dest = simplify_gen_subreg (vmode, op0, mode, 0);
11375 x = gen_rtx_AND (vmode, dest, nmask);
11377 else /* alternative 3,4 */
11379 gcc_assert (REGNO (nmask) == REGNO (dest));
11381 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
11382 x = gen_rtx_AND (vmode, dest, op0);
11384 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11387 x = gen_rtx_IOR (vmode, dest, scratch);
11388 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11391 /* Return TRUE or FALSE depending on whether the first SET in INSN
11392 has source and destination with matching CC modes, and that the
11393 CC mode is at least as constrained as REQ_MODE. */
11396 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
11399 enum machine_mode set_mode;
11401 set = PATTERN (insn);
11402 if (GET_CODE (set) == PARALLEL)
11403 set = XVECEXP (set, 0, 0);
11404 gcc_assert (GET_CODE (set) == SET);
11405 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
11407 set_mode = GET_MODE (SET_DEST (set));
11411 if (req_mode != CCNOmode
11412 && (req_mode != CCmode
11413 || XEXP (SET_SRC (set), 1) != const0_rtx))
11417 if (req_mode == CCGCmode)
11421 if (req_mode == CCGOCmode || req_mode == CCNOmode)
11425 if (req_mode == CCZmode)
11432 gcc_unreachable ();
11435 return (GET_MODE (SET_SRC (set)) == set_mode);
11438 /* Generate insn patterns to do an integer compare of OPERANDS. */
11441 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
11443 enum machine_mode cmpmode;
11446 cmpmode = SELECT_CC_MODE (code, op0, op1);
11447 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
11449 /* This is very simple, but making the interface the same as in the
11450 FP case makes the rest of the code easier. */
11451 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
11452 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
11454 /* Return the test that should be put into the flags user, i.e.
11455 the bcc, scc, or cmov instruction. */
11456 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
11459 /* Figure out whether to use ordered or unordered fp comparisons.
11460 Return the appropriate mode to use. */
11463 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
11465 /* ??? In order to make all comparisons reversible, we do all comparisons
11466 non-trapping when compiling for IEEE. Once gcc is able to distinguish
11467 all forms trapping and nontrapping comparisons, we can make inequality
11468 comparisons trapping again, since it results in better code when using
11469 FCOM based compares. */
11470 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
11474 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
11476 enum machine_mode mode = GET_MODE (op0);
11478 if (SCALAR_FLOAT_MODE_P (mode))
11480 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
11481 return ix86_fp_compare_mode (code);
11486 /* Only zero flag is needed. */
11487 case EQ: /* ZF=0 */
11488 case NE: /* ZF!=0 */
11490 /* Codes needing carry flag. */
11491 case GEU: /* CF=0 */
11492 case LTU: /* CF=1 */
11493 /* Detect overflow checks. They need just the carry flag. */
11494 if (GET_CODE (op0) == PLUS
11495 && rtx_equal_p (op1, XEXP (op0, 0)))
11499 case GTU: /* CF=0 & ZF=0 */
11500 case LEU: /* CF=1 | ZF=1 */
11501 /* Detect overflow checks. They need just the carry flag. */
11502 if (GET_CODE (op0) == MINUS
11503 && rtx_equal_p (op1, XEXP (op0, 0)))
11507 /* Codes possibly doable only with sign flag when
11508 comparing against zero. */
11509 case GE: /* SF=OF or SF=0 */
11510 case LT: /* SF<>OF or SF=1 */
11511 if (op1 == const0_rtx)
11514 /* For other cases Carry flag is not required. */
11516 /* Codes doable only with sign flag when comparing
11517 against zero, but we miss jump instruction for it
11518 so we need to use relational tests against overflow
11519 that thus needs to be zero. */
11520 case GT: /* ZF=0 & SF=OF */
11521 case LE: /* ZF=1 | SF<>OF */
11522 if (op1 == const0_rtx)
11526 /* strcmp pattern do (use flags) and combine may ask us for proper
11531 gcc_unreachable ();
11535 /* Return the fixed registers used for condition codes. */
11538 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
11545 /* If two condition code modes are compatible, return a condition code
11546 mode which is compatible with both. Otherwise, return
11549 static enum machine_mode
11550 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
11555 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
11558 if ((m1 == CCGCmode && m2 == CCGOCmode)
11559 || (m1 == CCGOCmode && m2 == CCGCmode))
11565 gcc_unreachable ();
11595 /* These are only compatible with themselves, which we already
11601 /* Split comparison code CODE into comparisons we can do using branch
11602 instructions. BYPASS_CODE is comparison code for branch that will
11603 branch around FIRST_CODE and SECOND_CODE. If some of branches
11604 is not required, set value to UNKNOWN.
11605 We never require more than two branches. */
11608 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
11609 enum rtx_code *first_code,
11610 enum rtx_code *second_code)
11612 *first_code = code;
11613 *bypass_code = UNKNOWN;
11614 *second_code = UNKNOWN;
11616 /* The fcomi comparison sets flags as follows:
11626 case GT: /* GTU - CF=0 & ZF=0 */
11627 case GE: /* GEU - CF=0 */
11628 case ORDERED: /* PF=0 */
11629 case UNORDERED: /* PF=1 */
11630 case UNEQ: /* EQ - ZF=1 */
11631 case UNLT: /* LTU - CF=1 */
11632 case UNLE: /* LEU - CF=1 | ZF=1 */
11633 case LTGT: /* EQ - ZF=0 */
11635 case LT: /* LTU - CF=1 - fails on unordered */
11636 *first_code = UNLT;
11637 *bypass_code = UNORDERED;
11639 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
11640 *first_code = UNLE;
11641 *bypass_code = UNORDERED;
11643 case EQ: /* EQ - ZF=1 - fails on unordered */
11644 *first_code = UNEQ;
11645 *bypass_code = UNORDERED;
11647 case NE: /* NE - ZF=0 - fails on unordered */
11648 *first_code = LTGT;
11649 *second_code = UNORDERED;
11651 case UNGE: /* GEU - CF=0 - fails on unordered */
11653 *second_code = UNORDERED;
11655 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
11657 *second_code = UNORDERED;
11660 gcc_unreachable ();
11662 if (!TARGET_IEEE_FP)
11664 *second_code = UNKNOWN;
11665 *bypass_code = UNKNOWN;
11669 /* Return cost of comparison done fcom + arithmetics operations on AX.
11670 All following functions do use number of instructions as a cost metrics.
11671 In future this should be tweaked to compute bytes for optimize_size and
11672 take into account performance of various instructions on various CPUs. */
11674 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
11676 if (!TARGET_IEEE_FP)
11678 /* The cost of code output by ix86_expand_fp_compare. */
11702 gcc_unreachable ();
11706 /* Return cost of comparison done using fcomi operation.
11707 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11709 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
11711 enum rtx_code bypass_code, first_code, second_code;
11712 /* Return arbitrarily high cost when instruction is not supported - this
11713 prevents gcc from using it. */
11716 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11717 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
11720 /* Return cost of comparison done using sahf operation.
11721 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11723 ix86_fp_comparison_sahf_cost (enum rtx_code code)
11725 enum rtx_code bypass_code, first_code, second_code;
11726 /* Return arbitrarily high cost when instruction is not preferred - this
11727 avoids gcc from using it. */
11728 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
11730 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11731 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
11734 /* Compute cost of the comparison done using any method.
11735 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11737 ix86_fp_comparison_cost (enum rtx_code code)
11739 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
11742 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
11743 sahf_cost = ix86_fp_comparison_sahf_cost (code);
11745 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
11746 if (min > sahf_cost)
11748 if (min > fcomi_cost)
11753 /* Return true if we should use an FCOMI instruction for this
11757 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
11759 enum rtx_code swapped_code = swap_condition (code);
11761 return ((ix86_fp_comparison_cost (code)
11762 == ix86_fp_comparison_fcomi_cost (code))
11763 || (ix86_fp_comparison_cost (swapped_code)
11764 == ix86_fp_comparison_fcomi_cost (swapped_code)));
11767 /* Swap, force into registers, or otherwise massage the two operands
11768 to a fp comparison. The operands are updated in place; the new
11769 comparison code is returned. */
11771 static enum rtx_code
11772 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
11774 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
11775 rtx op0 = *pop0, op1 = *pop1;
11776 enum machine_mode op_mode = GET_MODE (op0);
11777 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
11779 /* All of the unordered compare instructions only work on registers.
11780 The same is true of the fcomi compare instructions. The XFmode
11781 compare instructions require registers except when comparing
11782 against zero or when converting operand 1 from fixed point to
11786 && (fpcmp_mode == CCFPUmode
11787 || (op_mode == XFmode
11788 && ! (standard_80387_constant_p (op0) == 1
11789 || standard_80387_constant_p (op1) == 1)
11790 && GET_CODE (op1) != FLOAT)
11791 || ix86_use_fcomi_compare (code)))
11793 op0 = force_reg (op_mode, op0);
11794 op1 = force_reg (op_mode, op1);
11798 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
11799 things around if they appear profitable, otherwise force op0
11800 into a register. */
11802 if (standard_80387_constant_p (op0) == 0
11804 && ! (standard_80387_constant_p (op1) == 0
11808 tmp = op0, op0 = op1, op1 = tmp;
11809 code = swap_condition (code);
11813 op0 = force_reg (op_mode, op0);
11815 if (CONSTANT_P (op1))
11817 int tmp = standard_80387_constant_p (op1);
11819 op1 = validize_mem (force_const_mem (op_mode, op1));
11823 op1 = force_reg (op_mode, op1);
11826 op1 = force_reg (op_mode, op1);
11830 /* Try to rearrange the comparison to make it cheaper. */
11831 if (ix86_fp_comparison_cost (code)
11832 > ix86_fp_comparison_cost (swap_condition (code))
11833 && (REG_P (op1) || can_create_pseudo_p ()))
11836 tmp = op0, op0 = op1, op1 = tmp;
11837 code = swap_condition (code);
11839 op0 = force_reg (op_mode, op0);
11847 /* Convert comparison codes we use to represent FP comparison to integer
11848 code that will result in proper branch. Return UNKNOWN if no such code
11852 ix86_fp_compare_code_to_integer (enum rtx_code code)
11881 /* Generate insn patterns to do a floating point compare of OPERANDS. */
11884 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
11885 rtx *second_test, rtx *bypass_test)
11887 enum machine_mode fpcmp_mode, intcmp_mode;
11889 int cost = ix86_fp_comparison_cost (code);
11890 enum rtx_code bypass_code, first_code, second_code;
11892 fpcmp_mode = ix86_fp_compare_mode (code);
11893 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
11896 *second_test = NULL_RTX;
11898 *bypass_test = NULL_RTX;
11900 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11902 /* Do fcomi/sahf based test when profitable. */
11903 if (ix86_fp_comparison_arithmetics_cost (code) > cost
11904 && (bypass_code == UNKNOWN || bypass_test)
11905 && (second_code == UNKNOWN || second_test))
11907 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11908 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
11914 gcc_assert (TARGET_SAHF);
11917 scratch = gen_reg_rtx (HImode);
11918 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
11920 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
11923 /* The FP codes work out to act like unsigned. */
11924 intcmp_mode = fpcmp_mode;
11926 if (bypass_code != UNKNOWN)
11927 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
11928 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11930 if (second_code != UNKNOWN)
11931 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
11932 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11937 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
11938 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11939 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11941 scratch = gen_reg_rtx (HImode);
11942 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11944 /* In the unordered case, we have to check C2 for NaN's, which
11945 doesn't happen to work out to anything nice combination-wise.
11946 So do some bit twiddling on the value we've got in AH to come
11947 up with an appropriate set of condition codes. */
11949 intcmp_mode = CCNOmode;
11954 if (code == GT || !TARGET_IEEE_FP)
11956 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11961 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11962 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11963 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
11964 intcmp_mode = CCmode;
11970 if (code == LT && TARGET_IEEE_FP)
11972 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11973 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
11974 intcmp_mode = CCmode;
11979 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
11985 if (code == GE || !TARGET_IEEE_FP)
11987 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
11992 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11993 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
12000 if (code == LE && TARGET_IEEE_FP)
12002 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12003 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
12004 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
12005 intcmp_mode = CCmode;
12010 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
12016 if (code == EQ && TARGET_IEEE_FP)
12018 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12019 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
12020 intcmp_mode = CCmode;
12025 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
12032 if (code == NE && TARGET_IEEE_FP)
12034 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12035 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
12041 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
12047 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
12051 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
12056 gcc_unreachable ();
12060 /* Return the test that should be put into the flags user, i.e.
12061 the bcc, scc, or cmov instruction. */
12062 return gen_rtx_fmt_ee (code, VOIDmode,
12063 gen_rtx_REG (intcmp_mode, FLAGS_REG),
12068 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
12071 op0 = ix86_compare_op0;
12072 op1 = ix86_compare_op1;
12075 *second_test = NULL_RTX;
12077 *bypass_test = NULL_RTX;
12079 if (ix86_compare_emitted)
12081 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
12082 ix86_compare_emitted = NULL_RTX;
12084 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
12086 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
12087 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
12088 second_test, bypass_test);
12091 ret = ix86_expand_int_compare (code, op0, op1);
12096 /* Return true if the CODE will result in nontrivial jump sequence. */
12098 ix86_fp_jump_nontrivial_p (enum rtx_code code)
12100 enum rtx_code bypass_code, first_code, second_code;
12103 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
12104 return bypass_code != UNKNOWN || second_code != UNKNOWN;
12108 ix86_expand_branch (enum rtx_code code, rtx label)
12112 /* If we have emitted a compare insn, go straight to simple.
12113 ix86_expand_compare won't emit anything if ix86_compare_emitted
12115 if (ix86_compare_emitted)
12118 switch (GET_MODE (ix86_compare_op0))
12124 tmp = ix86_expand_compare (code, NULL, NULL);
12125 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
12126 gen_rtx_LABEL_REF (VOIDmode, label),
12128 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
12137 enum rtx_code bypass_code, first_code, second_code;
12139 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
12140 &ix86_compare_op1);
12142 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
12144 /* Check whether we will use the natural sequence with one jump. If
12145 so, we can expand jump early. Otherwise delay expansion by
12146 creating compound insn to not confuse optimizers. */
12147 if (bypass_code == UNKNOWN && second_code == UNKNOWN)
12149 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
12150 gen_rtx_LABEL_REF (VOIDmode, label),
12151 pc_rtx, NULL_RTX, NULL_RTX);
12155 tmp = gen_rtx_fmt_ee (code, VOIDmode,
12156 ix86_compare_op0, ix86_compare_op1);
12157 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
12158 gen_rtx_LABEL_REF (VOIDmode, label),
12160 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
12162 use_fcomi = ix86_use_fcomi_compare (code);
12163 vec = rtvec_alloc (3 + !use_fcomi);
12164 RTVEC_ELT (vec, 0) = tmp;
12166 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
12168 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
12171 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
12173 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
12182 /* Expand DImode branch into multiple compare+branch. */
12184 rtx lo[2], hi[2], label2;
12185 enum rtx_code code1, code2, code3;
12186 enum machine_mode submode;
12188 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
12190 tmp = ix86_compare_op0;
12191 ix86_compare_op0 = ix86_compare_op1;
12192 ix86_compare_op1 = tmp;
12193 code = swap_condition (code);
12195 if (GET_MODE (ix86_compare_op0) == DImode)
12197 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
12198 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
12203 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
12204 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
12208 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
12209 avoid two branches. This costs one extra insn, so disable when
12210 optimizing for size. */
12212 if ((code == EQ || code == NE)
12214 || hi[1] == const0_rtx || lo[1] == const0_rtx))
12219 if (hi[1] != const0_rtx)
12220 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
12221 NULL_RTX, 0, OPTAB_WIDEN);
12224 if (lo[1] != const0_rtx)
12225 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
12226 NULL_RTX, 0, OPTAB_WIDEN);
12228 tmp = expand_binop (submode, ior_optab, xor1, xor0,
12229 NULL_RTX, 0, OPTAB_WIDEN);
12231 ix86_compare_op0 = tmp;
12232 ix86_compare_op1 = const0_rtx;
12233 ix86_expand_branch (code, label);
12237 /* Otherwise, if we are doing less-than or greater-or-equal-than,
12238 op1 is a constant and the low word is zero, then we can just
12239 examine the high word. Similarly for low word -1 and
12240 less-or-equal-than or greater-than. */
12242 if (CONST_INT_P (hi[1]))
12245 case LT: case LTU: case GE: case GEU:
12246 if (lo[1] == const0_rtx)
12248 ix86_compare_op0 = hi[0];
12249 ix86_compare_op1 = hi[1];
12250 ix86_expand_branch (code, label);
12254 case LE: case LEU: case GT: case GTU:
12255 if (lo[1] == constm1_rtx)
12257 ix86_compare_op0 = hi[0];
12258 ix86_compare_op1 = hi[1];
12259 ix86_expand_branch (code, label);
12267 /* Otherwise, we need two or three jumps. */
12269 label2 = gen_label_rtx ();
12272 code2 = swap_condition (code);
12273 code3 = unsigned_condition (code);
12277 case LT: case GT: case LTU: case GTU:
12280 case LE: code1 = LT; code2 = GT; break;
12281 case GE: code1 = GT; code2 = LT; break;
12282 case LEU: code1 = LTU; code2 = GTU; break;
12283 case GEU: code1 = GTU; code2 = LTU; break;
12285 case EQ: code1 = UNKNOWN; code2 = NE; break;
12286 case NE: code2 = UNKNOWN; break;
12289 gcc_unreachable ();
12294 * if (hi(a) < hi(b)) goto true;
12295 * if (hi(a) > hi(b)) goto false;
12296 * if (lo(a) < lo(b)) goto true;
12300 ix86_compare_op0 = hi[0];
12301 ix86_compare_op1 = hi[1];
12303 if (code1 != UNKNOWN)
12304 ix86_expand_branch (code1, label);
12305 if (code2 != UNKNOWN)
12306 ix86_expand_branch (code2, label2);
12308 ix86_compare_op0 = lo[0];
12309 ix86_compare_op1 = lo[1];
12310 ix86_expand_branch (code3, label);
12312 if (code2 != UNKNOWN)
12313 emit_label (label2);
12318 gcc_unreachable ();
12322 /* Split branch based on floating point condition. */
12324 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
12325 rtx target1, rtx target2, rtx tmp, rtx pushed)
12327 rtx second, bypass;
12328 rtx label = NULL_RTX;
12330 int bypass_probability = -1, second_probability = -1, probability = -1;
12333 if (target2 != pc_rtx)
12336 code = reverse_condition_maybe_unordered (code);
12341 condition = ix86_expand_fp_compare (code, op1, op2,
12342 tmp, &second, &bypass);
12344 /* Remove pushed operand from stack. */
12346 ix86_free_from_memory (GET_MODE (pushed));
12348 if (split_branch_probability >= 0)
12350 /* Distribute the probabilities across the jumps.
12351 Assume the BYPASS and SECOND to be always test
12353 probability = split_branch_probability;
12355 /* Value of 1 is low enough to make no need for probability
12356 to be updated. Later we may run some experiments and see
12357 if unordered values are more frequent in practice. */
12359 bypass_probability = 1;
12361 second_probability = 1;
12363 if (bypass != NULL_RTX)
12365 label = gen_label_rtx ();
12366 i = emit_jump_insn (gen_rtx_SET
12368 gen_rtx_IF_THEN_ELSE (VOIDmode,
12370 gen_rtx_LABEL_REF (VOIDmode,
12373 if (bypass_probability >= 0)
12375 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12376 GEN_INT (bypass_probability),
12379 i = emit_jump_insn (gen_rtx_SET
12381 gen_rtx_IF_THEN_ELSE (VOIDmode,
12382 condition, target1, target2)));
12383 if (probability >= 0)
12385 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12386 GEN_INT (probability),
12388 if (second != NULL_RTX)
12390 i = emit_jump_insn (gen_rtx_SET
12392 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
12394 if (second_probability >= 0)
12396 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12397 GEN_INT (second_probability),
12400 if (label != NULL_RTX)
12401 emit_label (label);
12405 ix86_expand_setcc (enum rtx_code code, rtx dest)
12407 rtx ret, tmp, tmpreg, equiv;
12408 rtx second_test, bypass_test;
12410 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
12411 return 0; /* FAIL */
12413 gcc_assert (GET_MODE (dest) == QImode);
12415 ret = ix86_expand_compare (code, &second_test, &bypass_test);
12416 PUT_MODE (ret, QImode);
12421 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
12422 if (bypass_test || second_test)
12424 rtx test = second_test;
12426 rtx tmp2 = gen_reg_rtx (QImode);
12429 gcc_assert (!second_test);
12430 test = bypass_test;
12432 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
12434 PUT_MODE (test, QImode);
12435 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
12438 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
12440 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
12443 /* Attach a REG_EQUAL note describing the comparison result. */
12444 if (ix86_compare_op0 && ix86_compare_op1)
12446 equiv = simplify_gen_relational (code, QImode,
12447 GET_MODE (ix86_compare_op0),
12448 ix86_compare_op0, ix86_compare_op1);
12449 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
12452 return 1; /* DONE */
12455 /* Expand comparison setting or clearing carry flag. Return true when
12456 successful and set pop for the operation. */
12458 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
12460 enum machine_mode mode =
12461 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
12463 /* Do not handle DImode compares that go through special path. */
12464 if (mode == (TARGET_64BIT ? TImode : DImode))
12467 if (SCALAR_FLOAT_MODE_P (mode))
12469 rtx second_test = NULL, bypass_test = NULL;
12470 rtx compare_op, compare_seq;
12472 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
12474 /* Shortcut: following common codes never translate
12475 into carry flag compares. */
12476 if (code == EQ || code == NE || code == UNEQ || code == LTGT
12477 || code == ORDERED || code == UNORDERED)
12480 /* These comparisons require zero flag; swap operands so they won't. */
12481 if ((code == GT || code == UNLE || code == LE || code == UNGT)
12482 && !TARGET_IEEE_FP)
12487 code = swap_condition (code);
12490 /* Try to expand the comparison and verify that we end up with
12491 carry flag based comparison. This fails to be true only when
12492 we decide to expand comparison using arithmetic that is not
12493 too common scenario. */
12495 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
12496 &second_test, &bypass_test);
12497 compare_seq = get_insns ();
12500 if (second_test || bypass_test)
12503 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12504 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12505 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
12507 code = GET_CODE (compare_op);
12509 if (code != LTU && code != GEU)
12512 emit_insn (compare_seq);
12517 if (!INTEGRAL_MODE_P (mode))
12526 /* Convert a==0 into (unsigned)a<1. */
12529 if (op1 != const0_rtx)
12532 code = (code == EQ ? LTU : GEU);
12535 /* Convert a>b into b<a or a>=b-1. */
12538 if (CONST_INT_P (op1))
12540 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
12541 /* Bail out on overflow. We still can swap operands but that
12542 would force loading of the constant into register. */
12543 if (op1 == const0_rtx
12544 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
12546 code = (code == GTU ? GEU : LTU);
12553 code = (code == GTU ? LTU : GEU);
12557 /* Convert a>=0 into (unsigned)a<0x80000000. */
12560 if (mode == DImode || op1 != const0_rtx)
12562 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12563 code = (code == LT ? GEU : LTU);
12567 if (mode == DImode || op1 != constm1_rtx)
12569 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12570 code = (code == LE ? GEU : LTU);
12576 /* Swapping operands may cause constant to appear as first operand. */
12577 if (!nonimmediate_operand (op0, VOIDmode))
12579 if (!can_create_pseudo_p ())
12581 op0 = force_reg (mode, op0);
12583 ix86_compare_op0 = op0;
12584 ix86_compare_op1 = op1;
12585 *pop = ix86_expand_compare (code, NULL, NULL);
12586 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
12591 ix86_expand_int_movcc (rtx operands[])
12593 enum rtx_code code = GET_CODE (operands[1]), compare_code;
12594 rtx compare_seq, compare_op;
12595 rtx second_test, bypass_test;
12596 enum machine_mode mode = GET_MODE (operands[0]);
12597 bool sign_bit_compare_p = false;;
12600 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12601 compare_seq = get_insns ();
12604 compare_code = GET_CODE (compare_op);
12606 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
12607 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
12608 sign_bit_compare_p = true;
12610 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
12611 HImode insns, we'd be swallowed in word prefix ops. */
12613 if ((mode != HImode || TARGET_FAST_PREFIX)
12614 && (mode != (TARGET_64BIT ? TImode : DImode))
12615 && CONST_INT_P (operands[2])
12616 && CONST_INT_P (operands[3]))
12618 rtx out = operands[0];
12619 HOST_WIDE_INT ct = INTVAL (operands[2]);
12620 HOST_WIDE_INT cf = INTVAL (operands[3]);
12621 HOST_WIDE_INT diff;
12624 /* Sign bit compares are better done using shifts than we do by using
12626 if (sign_bit_compare_p
12627 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12628 ix86_compare_op1, &compare_op))
12630 /* Detect overlap between destination and compare sources. */
12633 if (!sign_bit_compare_p)
12635 bool fpcmp = false;
12637 compare_code = GET_CODE (compare_op);
12639 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12640 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12643 compare_code = ix86_fp_compare_code_to_integer (compare_code);
12646 /* To simplify rest of code, restrict to the GEU case. */
12647 if (compare_code == LTU)
12649 HOST_WIDE_INT tmp = ct;
12652 compare_code = reverse_condition (compare_code);
12653 code = reverse_condition (code);
12658 PUT_CODE (compare_op,
12659 reverse_condition_maybe_unordered
12660 (GET_CODE (compare_op)));
12662 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12666 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
12667 || reg_overlap_mentioned_p (out, ix86_compare_op1))
12668 tmp = gen_reg_rtx (mode);
12670 if (mode == DImode)
12671 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
12673 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
12677 if (code == GT || code == GE)
12678 code = reverse_condition (code);
12681 HOST_WIDE_INT tmp = ct;
12686 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
12687 ix86_compare_op1, VOIDmode, 0, -1);
12700 tmp = expand_simple_binop (mode, PLUS,
12702 copy_rtx (tmp), 1, OPTAB_DIRECT);
12713 tmp = expand_simple_binop (mode, IOR,
12715 copy_rtx (tmp), 1, OPTAB_DIRECT);
12717 else if (diff == -1 && ct)
12727 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12729 tmp = expand_simple_binop (mode, PLUS,
12730 copy_rtx (tmp), GEN_INT (cf),
12731 copy_rtx (tmp), 1, OPTAB_DIRECT);
12739 * andl cf - ct, dest
12749 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12752 tmp = expand_simple_binop (mode, AND,
12754 gen_int_mode (cf - ct, mode),
12755 copy_rtx (tmp), 1, OPTAB_DIRECT);
12757 tmp = expand_simple_binop (mode, PLUS,
12758 copy_rtx (tmp), GEN_INT (ct),
12759 copy_rtx (tmp), 1, OPTAB_DIRECT);
12762 if (!rtx_equal_p (tmp, out))
12763 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
12765 return 1; /* DONE */
12770 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12773 tmp = ct, ct = cf, cf = tmp;
12776 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12778 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12780 /* We may be reversing unordered compare to normal compare, that
12781 is not valid in general (we may convert non-trapping condition
12782 to trapping one), however on i386 we currently emit all
12783 comparisons unordered. */
12784 compare_code = reverse_condition_maybe_unordered (compare_code);
12785 code = reverse_condition_maybe_unordered (code);
12789 compare_code = reverse_condition (compare_code);
12790 code = reverse_condition (code);
12794 compare_code = UNKNOWN;
12795 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
12796 && CONST_INT_P (ix86_compare_op1))
12798 if (ix86_compare_op1 == const0_rtx
12799 && (code == LT || code == GE))
12800 compare_code = code;
12801 else if (ix86_compare_op1 == constm1_rtx)
12805 else if (code == GT)
12810 /* Optimize dest = (op0 < 0) ? -1 : cf. */
12811 if (compare_code != UNKNOWN
12812 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
12813 && (cf == -1 || ct == -1))
12815 /* If lea code below could be used, only optimize
12816 if it results in a 2 insn sequence. */
12818 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
12819 || diff == 3 || diff == 5 || diff == 9)
12820 || (compare_code == LT && ct == -1)
12821 || (compare_code == GE && cf == -1))
12824 * notl op1 (if necessary)
12832 code = reverse_condition (code);
12835 out = emit_store_flag (out, code, ix86_compare_op0,
12836 ix86_compare_op1, VOIDmode, 0, -1);
12838 out = expand_simple_binop (mode, IOR,
12840 out, 1, OPTAB_DIRECT);
12841 if (out != operands[0])
12842 emit_move_insn (operands[0], out);
12844 return 1; /* DONE */
12849 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
12850 || diff == 3 || diff == 5 || diff == 9)
12851 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
12853 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
12859 * lea cf(dest*(ct-cf)),dest
12863 * This also catches the degenerate setcc-only case.
12869 out = emit_store_flag (out, code, ix86_compare_op0,
12870 ix86_compare_op1, VOIDmode, 0, 1);
12873 /* On x86_64 the lea instruction operates on Pmode, so we need
12874 to get arithmetics done in proper mode to match. */
12876 tmp = copy_rtx (out);
12880 out1 = copy_rtx (out);
12881 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
12885 tmp = gen_rtx_PLUS (mode, tmp, out1);
12891 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
12894 if (!rtx_equal_p (tmp, out))
12897 out = force_operand (tmp, copy_rtx (out));
12899 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
12901 if (!rtx_equal_p (out, operands[0]))
12902 emit_move_insn (operands[0], copy_rtx (out));
12904 return 1; /* DONE */
12908 * General case: Jumpful:
12909 * xorl dest,dest cmpl op1, op2
12910 * cmpl op1, op2 movl ct, dest
12911 * setcc dest jcc 1f
12912 * decl dest movl cf, dest
12913 * andl (cf-ct),dest 1:
12916 * Size 20. Size 14.
12918 * This is reasonably steep, but branch mispredict costs are
12919 * high on modern cpus, so consider failing only if optimizing
12923 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12924 && BRANCH_COST >= 2)
12928 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12933 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12935 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12937 /* We may be reversing unordered compare to normal compare,
12938 that is not valid in general (we may convert non-trapping
12939 condition to trapping one), however on i386 we currently
12940 emit all comparisons unordered. */
12941 code = reverse_condition_maybe_unordered (code);
12945 code = reverse_condition (code);
12946 if (compare_code != UNKNOWN)
12947 compare_code = reverse_condition (compare_code);
12951 if (compare_code != UNKNOWN)
12953 /* notl op1 (if needed)
12958 For x < 0 (resp. x <= -1) there will be no notl,
12959 so if possible swap the constants to get rid of the
12961 True/false will be -1/0 while code below (store flag
12962 followed by decrement) is 0/-1, so the constants need
12963 to be exchanged once more. */
12965 if (compare_code == GE || !cf)
12967 code = reverse_condition (code);
12972 HOST_WIDE_INT tmp = cf;
12977 out = emit_store_flag (out, code, ix86_compare_op0,
12978 ix86_compare_op1, VOIDmode, 0, -1);
12982 out = emit_store_flag (out, code, ix86_compare_op0,
12983 ix86_compare_op1, VOIDmode, 0, 1);
12985 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
12986 copy_rtx (out), 1, OPTAB_DIRECT);
12989 out = expand_simple_binop (mode, AND, copy_rtx (out),
12990 gen_int_mode (cf - ct, mode),
12991 copy_rtx (out), 1, OPTAB_DIRECT);
12993 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
12994 copy_rtx (out), 1, OPTAB_DIRECT);
12995 if (!rtx_equal_p (out, operands[0]))
12996 emit_move_insn (operands[0], copy_rtx (out));
12998 return 1; /* DONE */
13002 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
13004 /* Try a few things more with specific constants and a variable. */
13007 rtx var, orig_out, out, tmp;
13009 if (BRANCH_COST <= 2)
13010 return 0; /* FAIL */
13012 /* If one of the two operands is an interesting constant, load a
13013 constant with the above and mask it in with a logical operation. */
13015 if (CONST_INT_P (operands[2]))
13018 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
13019 operands[3] = constm1_rtx, op = and_optab;
13020 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
13021 operands[3] = const0_rtx, op = ior_optab;
13023 return 0; /* FAIL */
13025 else if (CONST_INT_P (operands[3]))
13028 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
13029 operands[2] = constm1_rtx, op = and_optab;
13030 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
13031 operands[2] = const0_rtx, op = ior_optab;
13033 return 0; /* FAIL */
13036 return 0; /* FAIL */
13038 orig_out = operands[0];
13039 tmp = gen_reg_rtx (mode);
13042 /* Recurse to get the constant loaded. */
13043 if (ix86_expand_int_movcc (operands) == 0)
13044 return 0; /* FAIL */
13046 /* Mask in the interesting variable. */
13047 out = expand_binop (mode, op, var, tmp, orig_out, 0,
13049 if (!rtx_equal_p (out, orig_out))
13050 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
13052 return 1; /* DONE */
13056 * For comparison with above,
13066 if (! nonimmediate_operand (operands[2], mode))
13067 operands[2] = force_reg (mode, operands[2]);
13068 if (! nonimmediate_operand (operands[3], mode))
13069 operands[3] = force_reg (mode, operands[3]);
13071 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
13073 rtx tmp = gen_reg_rtx (mode);
13074 emit_move_insn (tmp, operands[3]);
13077 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
13079 rtx tmp = gen_reg_rtx (mode);
13080 emit_move_insn (tmp, operands[2]);
13084 if (! register_operand (operands[2], VOIDmode)
13086 || ! register_operand (operands[3], VOIDmode)))
13087 operands[2] = force_reg (mode, operands[2]);
13090 && ! register_operand (operands[3], VOIDmode))
13091 operands[3] = force_reg (mode, operands[3]);
13093 emit_insn (compare_seq);
13094 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13095 gen_rtx_IF_THEN_ELSE (mode,
13096 compare_op, operands[2],
13099 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
13100 gen_rtx_IF_THEN_ELSE (mode,
13102 copy_rtx (operands[3]),
13103 copy_rtx (operands[0]))));
13105 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
13106 gen_rtx_IF_THEN_ELSE (mode,
13108 copy_rtx (operands[2]),
13109 copy_rtx (operands[0]))));
13111 return 1; /* DONE */
13114 /* Swap, force into registers, or otherwise massage the two operands
13115 to an sse comparison with a mask result. Thus we differ a bit from
13116 ix86_prepare_fp_compare_args which expects to produce a flags result.
13118 The DEST operand exists to help determine whether to commute commutative
13119 operators. The POP0/POP1 operands are updated in place. The new
13120 comparison code is returned, or UNKNOWN if not implementable. */
13122 static enum rtx_code
13123 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
13124 rtx *pop0, rtx *pop1)
13132 /* We have no LTGT as an operator. We could implement it with
13133 NE & ORDERED, but this requires an extra temporary. It's
13134 not clear that it's worth it. */
13141 /* These are supported directly. */
13148 /* For commutative operators, try to canonicalize the destination
13149 operand to be first in the comparison - this helps reload to
13150 avoid extra moves. */
13151 if (!dest || !rtx_equal_p (dest, *pop1))
13159 /* These are not supported directly. Swap the comparison operands
13160 to transform into something that is supported. */
13164 code = swap_condition (code);
13168 gcc_unreachable ();
13174 /* Detect conditional moves that exactly match min/max operational
13175 semantics. Note that this is IEEE safe, as long as we don't
13176 interchange the operands.
13178 Returns FALSE if this conditional move doesn't match a MIN/MAX,
13179 and TRUE if the operation is successful and instructions are emitted. */
13182 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
13183 rtx cmp_op1, rtx if_true, rtx if_false)
13185 enum machine_mode mode;
13191 else if (code == UNGE)
13194 if_true = if_false;
13200 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
13202 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
13207 mode = GET_MODE (dest);
13209 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
13210 but MODE may be a vector mode and thus not appropriate. */
13211 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
13213 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
13216 if_true = force_reg (mode, if_true);
13217 v = gen_rtvec (2, if_true, if_false);
13218 tmp = gen_rtx_UNSPEC (mode, v, u);
13222 code = is_min ? SMIN : SMAX;
13223 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
13226 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
13230 /* Expand an sse vector comparison. Return the register with the result. */
13233 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
13234 rtx op_true, rtx op_false)
13236 enum machine_mode mode = GET_MODE (dest);
13239 cmp_op0 = force_reg (mode, cmp_op0);
13240 if (!nonimmediate_operand (cmp_op1, mode))
13241 cmp_op1 = force_reg (mode, cmp_op1);
13244 || reg_overlap_mentioned_p (dest, op_true)
13245 || reg_overlap_mentioned_p (dest, op_false))
13246 dest = gen_reg_rtx (mode);
13248 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
13249 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13254 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
13255 operations. This is used for both scalar and vector conditional moves. */
13258 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
13260 enum machine_mode mode = GET_MODE (dest);
13265 rtx pcmov = gen_rtx_SET (mode, dest,
13266 gen_rtx_IF_THEN_ELSE (mode, cmp,
13271 else if (op_false == CONST0_RTX (mode))
13273 op_true = force_reg (mode, op_true);
13274 x = gen_rtx_AND (mode, cmp, op_true);
13275 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13277 else if (op_true == CONST0_RTX (mode))
13279 op_false = force_reg (mode, op_false);
13280 x = gen_rtx_NOT (mode, cmp);
13281 x = gen_rtx_AND (mode, x, op_false);
13282 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13286 op_true = force_reg (mode, op_true);
13287 op_false = force_reg (mode, op_false);
13289 t2 = gen_reg_rtx (mode);
13291 t3 = gen_reg_rtx (mode);
13295 x = gen_rtx_AND (mode, op_true, cmp);
13296 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
13298 x = gen_rtx_NOT (mode, cmp);
13299 x = gen_rtx_AND (mode, x, op_false);
13300 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
13302 x = gen_rtx_IOR (mode, t3, t2);
13303 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13307 /* Expand a floating-point conditional move. Return true if successful. */
13310 ix86_expand_fp_movcc (rtx operands[])
13312 enum machine_mode mode = GET_MODE (operands[0]);
13313 enum rtx_code code = GET_CODE (operands[1]);
13314 rtx tmp, compare_op, second_test, bypass_test;
13316 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
13318 enum machine_mode cmode;
13320 /* Since we've no cmove for sse registers, don't force bad register
13321 allocation just to gain access to it. Deny movcc when the
13322 comparison mode doesn't match the move mode. */
13323 cmode = GET_MODE (ix86_compare_op0);
13324 if (cmode == VOIDmode)
13325 cmode = GET_MODE (ix86_compare_op1);
13329 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
13331 &ix86_compare_op1);
13332 if (code == UNKNOWN)
13335 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
13336 ix86_compare_op1, operands[2],
13340 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
13341 ix86_compare_op1, operands[2], operands[3]);
13342 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
13346 /* The floating point conditional move instructions don't directly
13347 support conditions resulting from a signed integer comparison. */
13349 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
13351 /* The floating point conditional move instructions don't directly
13352 support signed integer comparisons. */
13354 if (!fcmov_comparison_operator (compare_op, VOIDmode))
13356 gcc_assert (!second_test && !bypass_test);
13357 tmp = gen_reg_rtx (QImode);
13358 ix86_expand_setcc (code, tmp);
13360 ix86_compare_op0 = tmp;
13361 ix86_compare_op1 = const0_rtx;
13362 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
13364 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
13366 tmp = gen_reg_rtx (mode);
13367 emit_move_insn (tmp, operands[3]);
13370 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
13372 tmp = gen_reg_rtx (mode);
13373 emit_move_insn (tmp, operands[2]);
13377 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13378 gen_rtx_IF_THEN_ELSE (mode, compare_op,
13379 operands[2], operands[3])));
13381 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13382 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
13383 operands[3], operands[0])));
13385 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13386 gen_rtx_IF_THEN_ELSE (mode, second_test,
13387 operands[2], operands[0])));
13392 /* Expand a floating-point vector conditional move; a vcond operation
13393 rather than a movcc operation. */
13396 ix86_expand_fp_vcond (rtx operands[])
13398 enum rtx_code code = GET_CODE (operands[3]);
13401 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
13402 &operands[4], &operands[5]);
13403 if (code == UNKNOWN)
13406 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
13407 operands[5], operands[1], operands[2]))
13410 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
13411 operands[1], operands[2]);
13412 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
13416 /* Expand a signed/unsigned integral vector conditional move. */
13419 ix86_expand_int_vcond (rtx operands[])
13421 enum machine_mode mode = GET_MODE (operands[0]);
13422 enum rtx_code code = GET_CODE (operands[3]);
13423 bool negate = false;
13426 cop0 = operands[4];
13427 cop1 = operands[5];
13429 /* Canonicalize the comparison to EQ, GT, GTU. */
13440 code = reverse_condition (code);
13446 code = reverse_condition (code);
13452 code = swap_condition (code);
13453 x = cop0, cop0 = cop1, cop1 = x;
13457 gcc_unreachable ();
13460 /* Only SSE4.1/SSE4.2 supports V2DImode. */
13461 if (mode == V2DImode)
13466 /* SSE4.1 supports EQ. */
13467 if (!TARGET_SSE4_1)
13473 /* SSE4.2 supports GT/GTU. */
13474 if (!TARGET_SSE4_2)
13479 gcc_unreachable ();
13483 /* Unsigned parallel compare is not supported by the hardware. Play some
13484 tricks to turn this into a signed comparison against 0. */
13487 cop0 = force_reg (mode, cop0);
13496 /* Perform a parallel modulo subtraction. */
13497 t1 = gen_reg_rtx (mode);
13498 emit_insn ((mode == V4SImode
13500 : gen_subv2di3) (t1, cop0, cop1));
13502 /* Extract the original sign bit of op0. */
13503 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
13505 t2 = gen_reg_rtx (mode);
13506 emit_insn ((mode == V4SImode
13508 : gen_andv2di3) (t2, cop0, mask));
13510 /* XOR it back into the result of the subtraction. This results
13511 in the sign bit set iff we saw unsigned underflow. */
13512 x = gen_reg_rtx (mode);
13513 emit_insn ((mode == V4SImode
13515 : gen_xorv2di3) (x, t1, t2));
13523 /* Perform a parallel unsigned saturating subtraction. */
13524 x = gen_reg_rtx (mode);
13525 emit_insn (gen_rtx_SET (VOIDmode, x,
13526 gen_rtx_US_MINUS (mode, cop0, cop1)));
13533 gcc_unreachable ();
13537 cop1 = CONST0_RTX (mode);
13540 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
13541 operands[1+negate], operands[2-negate]);
13543 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
13544 operands[2-negate]);
13548 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
13549 true if we should do zero extension, else sign extension. HIGH_P is
13550 true if we want the N/2 high elements, else the low elements. */
13553 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13555 enum machine_mode imode = GET_MODE (operands[1]);
13556 rtx (*unpack)(rtx, rtx, rtx);
13563 unpack = gen_vec_interleave_highv16qi;
13565 unpack = gen_vec_interleave_lowv16qi;
13569 unpack = gen_vec_interleave_highv8hi;
13571 unpack = gen_vec_interleave_lowv8hi;
13575 unpack = gen_vec_interleave_highv4si;
13577 unpack = gen_vec_interleave_lowv4si;
13580 gcc_unreachable ();
13583 dest = gen_lowpart (imode, operands[0]);
13586 se = force_reg (imode, CONST0_RTX (imode));
13588 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
13589 operands[1], pc_rtx, pc_rtx);
13591 emit_insn (unpack (dest, operands[1], se));
13594 /* This function performs the same task as ix86_expand_sse_unpack,
13595 but with SSE4.1 instructions. */
13598 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13600 enum machine_mode imode = GET_MODE (operands[1]);
13601 rtx (*unpack)(rtx, rtx);
13608 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
13610 unpack = gen_sse4_1_extendv8qiv8hi2;
13614 unpack = gen_sse4_1_zero_extendv4hiv4si2;
13616 unpack = gen_sse4_1_extendv4hiv4si2;
13620 unpack = gen_sse4_1_zero_extendv2siv2di2;
13622 unpack = gen_sse4_1_extendv2siv2di2;
13625 gcc_unreachable ();
13628 dest = operands[0];
13631 /* Shift higher 8 bytes to lower 8 bytes. */
13632 src = gen_reg_rtx (imode);
13633 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
13634 gen_lowpart (TImode, operands[1]),
13640 emit_insn (unpack (dest, src));
13643 /* This function performs the same task as ix86_expand_sse_unpack,
13644 but with amdfam15 instructions. */
13646 #define PPERM_SRC 0x00 /* copy source */
13647 #define PPERM_INVERT 0x20 /* invert source */
13648 #define PPERM_REVERSE 0x40 /* bit reverse source */
13649 #define PPERM_REV_INV 0x60 /* bit reverse & invert src */
13650 #define PPERM_ZERO 0x80 /* all 0's */
13651 #define PPERM_ONES 0xa0 /* all 1's */
13652 #define PPERM_SIGN 0xc0 /* propagate sign bit */
13653 #define PPERM_INV_SIGN 0xe0 /* invert & propagate sign */
13655 #define PPERM_SRC1 0x00 /* use first source byte */
13656 #define PPERM_SRC2 0x10 /* use second source byte */
13659 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13661 enum machine_mode imode = GET_MODE (operands[1]);
13662 int pperm_bytes[16];
13664 int h = (high_p) ? 8 : 0;
13667 rtvec v = rtvec_alloc (16);
13670 rtx op0 = operands[0], op1 = operands[1];
13675 vs = rtvec_alloc (8);
13676 h2 = (high_p) ? 8 : 0;
13677 for (i = 0; i < 8; i++)
13679 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
13680 pperm_bytes[2*i+1] = ((unsigned_p)
13682 : PPERM_SIGN | PPERM_SRC2 | i | h);
13685 for (i = 0; i < 16; i++)
13686 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13688 for (i = 0; i < 8; i++)
13689 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13691 p = gen_rtx_PARALLEL (VOIDmode, vs);
13692 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13694 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
13696 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
13700 vs = rtvec_alloc (4);
13701 h2 = (high_p) ? 4 : 0;
13702 for (i = 0; i < 4; i++)
13704 sign_extend = ((unsigned_p)
13706 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
13707 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
13708 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
13709 pperm_bytes[4*i+2] = sign_extend;
13710 pperm_bytes[4*i+3] = sign_extend;
13713 for (i = 0; i < 16; i++)
13714 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13716 for (i = 0; i < 4; i++)
13717 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13719 p = gen_rtx_PARALLEL (VOIDmode, vs);
13720 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13722 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
13724 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
13728 vs = rtvec_alloc (2);
13729 h2 = (high_p) ? 2 : 0;
13730 for (i = 0; i < 2; i++)
13732 sign_extend = ((unsigned_p)
13734 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
13735 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
13736 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
13737 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
13738 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
13739 pperm_bytes[8*i+4] = sign_extend;
13740 pperm_bytes[8*i+5] = sign_extend;
13741 pperm_bytes[8*i+6] = sign_extend;
13742 pperm_bytes[8*i+7] = sign_extend;
13745 for (i = 0; i < 16; i++)
13746 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13748 for (i = 0; i < 2; i++)
13749 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13751 p = gen_rtx_PARALLEL (VOIDmode, vs);
13752 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13754 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
13756 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
13760 gcc_unreachable ();
13766 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
13767 next narrower integer vector type */
13769 ix86_expand_sse5_pack (rtx operands[3])
13771 enum machine_mode imode = GET_MODE (operands[0]);
13772 int pperm_bytes[16];
13774 rtvec v = rtvec_alloc (16);
13776 rtx op0 = operands[0];
13777 rtx op1 = operands[1];
13778 rtx op2 = operands[2];
13783 for (i = 0; i < 8; i++)
13785 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
13786 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
13789 for (i = 0; i < 16; i++)
13790 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13792 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13793 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
13797 for (i = 0; i < 4; i++)
13799 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
13800 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
13801 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
13802 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
13805 for (i = 0; i < 16; i++)
13806 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13808 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13809 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
13813 for (i = 0; i < 2; i++)
13815 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
13816 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
13817 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
13818 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
13819 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
13820 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
13821 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
13822 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
13825 for (i = 0; i < 16; i++)
13826 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13828 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13829 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
13833 gcc_unreachable ();
13839 /* Expand conditional increment or decrement using adb/sbb instructions.
13840 The default case using setcc followed by the conditional move can be
13841 done by generic code. */
13843 ix86_expand_int_addcc (rtx operands[])
13845 enum rtx_code code = GET_CODE (operands[1]);
13847 rtx val = const0_rtx;
13848 bool fpcmp = false;
13849 enum machine_mode mode = GET_MODE (operands[0]);
13851 if (operands[3] != const1_rtx
13852 && operands[3] != constm1_rtx)
13854 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
13855 ix86_compare_op1, &compare_op))
13857 code = GET_CODE (compare_op);
13859 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
13860 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
13863 code = ix86_fp_compare_code_to_integer (code);
13870 PUT_CODE (compare_op,
13871 reverse_condition_maybe_unordered
13872 (GET_CODE (compare_op)));
13874 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
13876 PUT_MODE (compare_op, mode);
13878 /* Construct either adc or sbb insn. */
13879 if ((code == LTU) == (operands[3] == constm1_rtx))
13881 switch (GET_MODE (operands[0]))
13884 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
13887 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
13890 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
13893 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
13896 gcc_unreachable ();
13901 switch (GET_MODE (operands[0]))
13904 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
13907 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
13910 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
13913 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
13916 gcc_unreachable ();
13919 return 1; /* DONE */
13923 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
13924 works for floating pointer parameters and nonoffsetable memories.
13925 For pushes, it returns just stack offsets; the values will be saved
13926 in the right order. Maximally three parts are generated. */
13929 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
13934 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
13936 size = (GET_MODE_SIZE (mode) + 4) / 8;
13938 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
13939 gcc_assert (size >= 2 && size <= 3);
13941 /* Optimize constant pool reference to immediates. This is used by fp
13942 moves, that force all constants to memory to allow combining. */
13943 if (MEM_P (operand) && MEM_READONLY_P (operand))
13945 rtx tmp = maybe_get_pool_constant (operand);
13950 if (MEM_P (operand) && !offsettable_memref_p (operand))
13952 /* The only non-offsetable memories we handle are pushes. */
13953 int ok = push_operand (operand, VOIDmode);
13957 operand = copy_rtx (operand);
13958 PUT_MODE (operand, Pmode);
13959 parts[0] = parts[1] = parts[2] = operand;
13963 if (GET_CODE (operand) == CONST_VECTOR)
13965 enum machine_mode imode = int_mode_for_mode (mode);
13966 /* Caution: if we looked through a constant pool memory above,
13967 the operand may actually have a different mode now. That's
13968 ok, since we want to pun this all the way back to an integer. */
13969 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
13970 gcc_assert (operand != NULL);
13976 if (mode == DImode)
13977 split_di (&operand, 1, &parts[0], &parts[1]);
13980 if (REG_P (operand))
13982 gcc_assert (reload_completed);
13983 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
13984 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
13986 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
13988 else if (offsettable_memref_p (operand))
13990 operand = adjust_address (operand, SImode, 0);
13991 parts[0] = operand;
13992 parts[1] = adjust_address (operand, SImode, 4);
13994 parts[2] = adjust_address (operand, SImode, 8);
13996 else if (GET_CODE (operand) == CONST_DOUBLE)
14001 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
14005 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
14006 parts[2] = gen_int_mode (l[2], SImode);
14009 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
14012 gcc_unreachable ();
14014 parts[1] = gen_int_mode (l[1], SImode);
14015 parts[0] = gen_int_mode (l[0], SImode);
14018 gcc_unreachable ();
14023 if (mode == TImode)
14024 split_ti (&operand, 1, &parts[0], &parts[1]);
14025 if (mode == XFmode || mode == TFmode)
14027 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
14028 if (REG_P (operand))
14030 gcc_assert (reload_completed);
14031 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
14032 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
14034 else if (offsettable_memref_p (operand))
14036 operand = adjust_address (operand, DImode, 0);
14037 parts[0] = operand;
14038 parts[1] = adjust_address (operand, upper_mode, 8);
14040 else if (GET_CODE (operand) == CONST_DOUBLE)
14045 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
14046 real_to_target (l, &r, mode);
14048 /* Do not use shift by 32 to avoid warning on 32bit systems. */
14049 if (HOST_BITS_PER_WIDE_INT >= 64)
14052 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
14053 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
14056 parts[0] = immed_double_const (l[0], l[1], DImode);
14058 if (upper_mode == SImode)
14059 parts[1] = gen_int_mode (l[2], SImode);
14060 else if (HOST_BITS_PER_WIDE_INT >= 64)
14063 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
14064 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
14067 parts[1] = immed_double_const (l[2], l[3], DImode);
14070 gcc_unreachable ();
14077 /* Emit insns to perform a move or push of DI, DF, and XF values.
14078 Return false when normal moves are needed; true when all required
14079 insns have been emitted. Operands 2-4 contain the input values
14080 int the correct order; operands 5-7 contain the output values. */
14083 ix86_split_long_move (rtx operands[])
14088 int collisions = 0;
14089 enum machine_mode mode = GET_MODE (operands[0]);
14091 /* The DFmode expanders may ask us to move double.
14092 For 64bit target this is single move. By hiding the fact
14093 here we simplify i386.md splitters. */
14094 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
14096 /* Optimize constant pool reference to immediates. This is used by
14097 fp moves, that force all constants to memory to allow combining. */
14099 if (MEM_P (operands[1])
14100 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
14101 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
14102 operands[1] = get_pool_constant (XEXP (operands[1], 0));
14103 if (push_operand (operands[0], VOIDmode))
14105 operands[0] = copy_rtx (operands[0]);
14106 PUT_MODE (operands[0], Pmode);
14109 operands[0] = gen_lowpart (DImode, operands[0]);
14110 operands[1] = gen_lowpart (DImode, operands[1]);
14111 emit_move_insn (operands[0], operands[1]);
14115 /* The only non-offsettable memory we handle is push. */
14116 if (push_operand (operands[0], VOIDmode))
14119 gcc_assert (!MEM_P (operands[0])
14120 || offsettable_memref_p (operands[0]));
14122 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
14123 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
14125 /* When emitting push, take care for source operands on the stack. */
14126 if (push && MEM_P (operands[1])
14127 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
14130 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
14131 XEXP (part[1][2], 0));
14132 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
14133 XEXP (part[1][1], 0));
14136 /* We need to do copy in the right order in case an address register
14137 of the source overlaps the destination. */
14138 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
14140 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
14142 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
14145 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
14148 /* Collision in the middle part can be handled by reordering. */
14149 if (collisions == 1 && nparts == 3
14150 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
14153 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
14154 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
14157 /* If there are more collisions, we can't handle it by reordering.
14158 Do an lea to the last part and use only one colliding move. */
14159 else if (collisions > 1)
14165 base = part[0][nparts - 1];
14167 /* Handle the case when the last part isn't valid for lea.
14168 Happens in 64-bit mode storing the 12-byte XFmode. */
14169 if (GET_MODE (base) != Pmode)
14170 base = gen_rtx_REG (Pmode, REGNO (base));
14172 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
14173 part[1][0] = replace_equiv_address (part[1][0], base);
14174 part[1][1] = replace_equiv_address (part[1][1],
14175 plus_constant (base, UNITS_PER_WORD));
14177 part[1][2] = replace_equiv_address (part[1][2],
14178 plus_constant (base, 8));
14188 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
14189 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
14190 emit_move_insn (part[0][2], part[1][2]);
14195 /* In 64bit mode we don't have 32bit push available. In case this is
14196 register, it is OK - we will just use larger counterpart. We also
14197 retype memory - these comes from attempt to avoid REX prefix on
14198 moving of second half of TFmode value. */
14199 if (GET_MODE (part[1][1]) == SImode)
14201 switch (GET_CODE (part[1][1]))
14204 part[1][1] = adjust_address (part[1][1], DImode, 0);
14208 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
14212 gcc_unreachable ();
14215 if (GET_MODE (part[1][0]) == SImode)
14216 part[1][0] = part[1][1];
14219 emit_move_insn (part[0][1], part[1][1]);
14220 emit_move_insn (part[0][0], part[1][0]);
14224 /* Choose correct order to not overwrite the source before it is copied. */
14225 if ((REG_P (part[0][0])
14226 && REG_P (part[1][1])
14227 && (REGNO (part[0][0]) == REGNO (part[1][1])
14229 && REGNO (part[0][0]) == REGNO (part[1][2]))))
14231 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
14235 operands[2] = part[0][2];
14236 operands[3] = part[0][1];
14237 operands[4] = part[0][0];
14238 operands[5] = part[1][2];
14239 operands[6] = part[1][1];
14240 operands[7] = part[1][0];
14244 operands[2] = part[0][1];
14245 operands[3] = part[0][0];
14246 operands[5] = part[1][1];
14247 operands[6] = part[1][0];
14254 operands[2] = part[0][0];
14255 operands[3] = part[0][1];
14256 operands[4] = part[0][2];
14257 operands[5] = part[1][0];
14258 operands[6] = part[1][1];
14259 operands[7] = part[1][2];
14263 operands[2] = part[0][0];
14264 operands[3] = part[0][1];
14265 operands[5] = part[1][0];
14266 operands[6] = part[1][1];
14270 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
14273 if (CONST_INT_P (operands[5])
14274 && operands[5] != const0_rtx
14275 && REG_P (operands[2]))
14277 if (CONST_INT_P (operands[6])
14278 && INTVAL (operands[6]) == INTVAL (operands[5]))
14279 operands[6] = operands[2];
14282 && CONST_INT_P (operands[7])
14283 && INTVAL (operands[7]) == INTVAL (operands[5]))
14284 operands[7] = operands[2];
14288 && CONST_INT_P (operands[6])
14289 && operands[6] != const0_rtx
14290 && REG_P (operands[3])
14291 && CONST_INT_P (operands[7])
14292 && INTVAL (operands[7]) == INTVAL (operands[6]))
14293 operands[7] = operands[3];
14296 emit_move_insn (operands[2], operands[5]);
14297 emit_move_insn (operands[3], operands[6]);
14299 emit_move_insn (operands[4], operands[7]);
14304 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
14305 left shift by a constant, either using a single shift or
14306 a sequence of add instructions. */
14309 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
14313 emit_insn ((mode == DImode
14315 : gen_adddi3) (operand, operand, operand));
14317 else if (!optimize_size
14318 && count * ix86_cost->add <= ix86_cost->shift_const)
14321 for (i=0; i<count; i++)
14323 emit_insn ((mode == DImode
14325 : gen_adddi3) (operand, operand, operand));
14329 emit_insn ((mode == DImode
14331 : gen_ashldi3) (operand, operand, GEN_INT (count)));
14335 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
14337 rtx low[2], high[2];
14339 const int single_width = mode == DImode ? 32 : 64;
14341 if (CONST_INT_P (operands[2]))
14343 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14344 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14346 if (count >= single_width)
14348 emit_move_insn (high[0], low[1]);
14349 emit_move_insn (low[0], const0_rtx);
14351 if (count > single_width)
14352 ix86_expand_ashl_const (high[0], count - single_width, mode);
14356 if (!rtx_equal_p (operands[0], operands[1]))
14357 emit_move_insn (operands[0], operands[1]);
14358 emit_insn ((mode == DImode
14360 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
14361 ix86_expand_ashl_const (low[0], count, mode);
14366 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14368 if (operands[1] == const1_rtx)
14370 /* Assuming we've chosen a QImode capable registers, then 1 << N
14371 can be done with two 32/64-bit shifts, no branches, no cmoves. */
14372 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
14374 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
14376 ix86_expand_clear (low[0]);
14377 ix86_expand_clear (high[0]);
14378 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
14380 d = gen_lowpart (QImode, low[0]);
14381 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
14382 s = gen_rtx_EQ (QImode, flags, const0_rtx);
14383 emit_insn (gen_rtx_SET (VOIDmode, d, s));
14385 d = gen_lowpart (QImode, high[0]);
14386 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
14387 s = gen_rtx_NE (QImode, flags, const0_rtx);
14388 emit_insn (gen_rtx_SET (VOIDmode, d, s));
14391 /* Otherwise, we can get the same results by manually performing
14392 a bit extract operation on bit 5/6, and then performing the two
14393 shifts. The two methods of getting 0/1 into low/high are exactly
14394 the same size. Avoiding the shift in the bit extract case helps
14395 pentium4 a bit; no one else seems to care much either way. */
14400 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
14401 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
14403 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
14404 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
14406 emit_insn ((mode == DImode
14408 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
14409 emit_insn ((mode == DImode
14411 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
14412 emit_move_insn (low[0], high[0]);
14413 emit_insn ((mode == DImode
14415 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
14418 emit_insn ((mode == DImode
14420 : gen_ashldi3) (low[0], low[0], operands[2]));
14421 emit_insn ((mode == DImode
14423 : gen_ashldi3) (high[0], high[0], operands[2]));
14427 if (operands[1] == constm1_rtx)
14429 /* For -1 << N, we can avoid the shld instruction, because we
14430 know that we're shifting 0...31/63 ones into a -1. */
14431 emit_move_insn (low[0], constm1_rtx);
14433 emit_move_insn (high[0], low[0]);
14435 emit_move_insn (high[0], constm1_rtx);
14439 if (!rtx_equal_p (operands[0], operands[1]))
14440 emit_move_insn (operands[0], operands[1]);
14442 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14443 emit_insn ((mode == DImode
14445 : gen_x86_64_shld) (high[0], low[0], operands[2]));
14448 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
14450 if (TARGET_CMOVE && scratch)
14452 ix86_expand_clear (scratch);
14453 emit_insn ((mode == DImode
14454 ? gen_x86_shift_adj_1
14455 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
14458 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
14462 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
14464 rtx low[2], high[2];
14466 const int single_width = mode == DImode ? 32 : 64;
14468 if (CONST_INT_P (operands[2]))
14470 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14471 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14473 if (count == single_width * 2 - 1)
14475 emit_move_insn (high[0], high[1]);
14476 emit_insn ((mode == DImode
14478 : gen_ashrdi3) (high[0], high[0],
14479 GEN_INT (single_width - 1)));
14480 emit_move_insn (low[0], high[0]);
14483 else if (count >= single_width)
14485 emit_move_insn (low[0], high[1]);
14486 emit_move_insn (high[0], low[0]);
14487 emit_insn ((mode == DImode
14489 : gen_ashrdi3) (high[0], high[0],
14490 GEN_INT (single_width - 1)));
14491 if (count > single_width)
14492 emit_insn ((mode == DImode
14494 : gen_ashrdi3) (low[0], low[0],
14495 GEN_INT (count - single_width)));
14499 if (!rtx_equal_p (operands[0], operands[1]))
14500 emit_move_insn (operands[0], operands[1]);
14501 emit_insn ((mode == DImode
14503 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
14504 emit_insn ((mode == DImode
14506 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
14511 if (!rtx_equal_p (operands[0], operands[1]))
14512 emit_move_insn (operands[0], operands[1]);
14514 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14516 emit_insn ((mode == DImode
14518 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
14519 emit_insn ((mode == DImode
14521 : gen_ashrdi3) (high[0], high[0], operands[2]));
14523 if (TARGET_CMOVE && scratch)
14525 emit_move_insn (scratch, high[0]);
14526 emit_insn ((mode == DImode
14528 : gen_ashrdi3) (scratch, scratch,
14529 GEN_INT (single_width - 1)));
14530 emit_insn ((mode == DImode
14531 ? gen_x86_shift_adj_1
14532 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
14536 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
14541 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
14543 rtx low[2], high[2];
14545 const int single_width = mode == DImode ? 32 : 64;
14547 if (CONST_INT_P (operands[2]))
14549 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14550 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14552 if (count >= single_width)
14554 emit_move_insn (low[0], high[1]);
14555 ix86_expand_clear (high[0]);
14557 if (count > single_width)
14558 emit_insn ((mode == DImode
14560 : gen_lshrdi3) (low[0], low[0],
14561 GEN_INT (count - single_width)));
14565 if (!rtx_equal_p (operands[0], operands[1]))
14566 emit_move_insn (operands[0], operands[1]);
14567 emit_insn ((mode == DImode
14569 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
14570 emit_insn ((mode == DImode
14572 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
14577 if (!rtx_equal_p (operands[0], operands[1]))
14578 emit_move_insn (operands[0], operands[1]);
14580 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14582 emit_insn ((mode == DImode
14584 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
14585 emit_insn ((mode == DImode
14587 : gen_lshrdi3) (high[0], high[0], operands[2]));
14589 /* Heh. By reversing the arguments, we can reuse this pattern. */
14590 if (TARGET_CMOVE && scratch)
14592 ix86_expand_clear (scratch);
14593 emit_insn ((mode == DImode
14594 ? gen_x86_shift_adj_1
14595 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
14599 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
14603 /* Predict just emitted jump instruction to be taken with probability PROB. */
14605 predict_jump (int prob)
14607 rtx insn = get_last_insn ();
14608 gcc_assert (JUMP_P (insn));
14610 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14615 /* Helper function for the string operations below. Dest VARIABLE whether
14616 it is aligned to VALUE bytes. If true, jump to the label. */
14618 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
14620 rtx label = gen_label_rtx ();
14621 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
14622 if (GET_MODE (variable) == DImode)
14623 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
14625 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
14626 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
14629 predict_jump (REG_BR_PROB_BASE * 50 / 100);
14631 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14635 /* Adjust COUNTER by the VALUE. */
14637 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
14639 if (GET_MODE (countreg) == DImode)
14640 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
14642 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
14645 /* Zero extend possibly SImode EXP to Pmode register. */
14647 ix86_zero_extend_to_Pmode (rtx exp)
14650 if (GET_MODE (exp) == VOIDmode)
14651 return force_reg (Pmode, exp);
14652 if (GET_MODE (exp) == Pmode)
14653 return copy_to_mode_reg (Pmode, exp);
14654 r = gen_reg_rtx (Pmode);
14655 emit_insn (gen_zero_extendsidi2 (r, exp));
14659 /* Divide COUNTREG by SCALE. */
14661 scale_counter (rtx countreg, int scale)
14664 rtx piece_size_mask;
14668 if (CONST_INT_P (countreg))
14669 return GEN_INT (INTVAL (countreg) / scale);
14670 gcc_assert (REG_P (countreg));
14672 piece_size_mask = GEN_INT (scale - 1);
14673 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
14674 GEN_INT (exact_log2 (scale)),
14675 NULL, 1, OPTAB_DIRECT);
14679 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
14680 DImode for constant loop counts. */
14682 static enum machine_mode
14683 counter_mode (rtx count_exp)
14685 if (GET_MODE (count_exp) != VOIDmode)
14686 return GET_MODE (count_exp);
14687 if (GET_CODE (count_exp) != CONST_INT)
14689 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
14694 /* When SRCPTR is non-NULL, output simple loop to move memory
14695 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
14696 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
14697 equivalent loop to set memory by VALUE (supposed to be in MODE).
14699 The size is rounded down to whole number of chunk size moved at once.
14700 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
14704 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
14705 rtx destptr, rtx srcptr, rtx value,
14706 rtx count, enum machine_mode mode, int unroll,
14709 rtx out_label, top_label, iter, tmp;
14710 enum machine_mode iter_mode = counter_mode (count);
14711 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
14712 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
14718 top_label = gen_label_rtx ();
14719 out_label = gen_label_rtx ();
14720 iter = gen_reg_rtx (iter_mode);
14722 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
14723 NULL, 1, OPTAB_DIRECT);
14724 /* Those two should combine. */
14725 if (piece_size == const1_rtx)
14727 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
14729 predict_jump (REG_BR_PROB_BASE * 10 / 100);
14731 emit_move_insn (iter, const0_rtx);
14733 emit_label (top_label);
14735 tmp = convert_modes (Pmode, iter_mode, iter, true);
14736 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
14737 destmem = change_address (destmem, mode, x_addr);
14741 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
14742 srcmem = change_address (srcmem, mode, y_addr);
14744 /* When unrolling for chips that reorder memory reads and writes,
14745 we can save registers by using single temporary.
14746 Also using 4 temporaries is overkill in 32bit mode. */
14747 if (!TARGET_64BIT && 0)
14749 for (i = 0; i < unroll; i++)
14754 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14756 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14758 emit_move_insn (destmem, srcmem);
14764 gcc_assert (unroll <= 4);
14765 for (i = 0; i < unroll; i++)
14767 tmpreg[i] = gen_reg_rtx (mode);
14771 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14773 emit_move_insn (tmpreg[i], srcmem);
14775 for (i = 0; i < unroll; i++)
14780 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14782 emit_move_insn (destmem, tmpreg[i]);
14787 for (i = 0; i < unroll; i++)
14791 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14792 emit_move_insn (destmem, value);
14795 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
14796 true, OPTAB_LIB_WIDEN);
14798 emit_move_insn (iter, tmp);
14800 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
14802 if (expected_size != -1)
14804 expected_size /= GET_MODE_SIZE (mode) * unroll;
14805 if (expected_size == 0)
14807 else if (expected_size > REG_BR_PROB_BASE)
14808 predict_jump (REG_BR_PROB_BASE - 1);
14810 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
14813 predict_jump (REG_BR_PROB_BASE * 80 / 100);
14814 iter = ix86_zero_extend_to_Pmode (iter);
14815 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
14816 true, OPTAB_LIB_WIDEN);
14817 if (tmp != destptr)
14818 emit_move_insn (destptr, tmp);
14821 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
14822 true, OPTAB_LIB_WIDEN);
14824 emit_move_insn (srcptr, tmp);
14826 emit_label (out_label);
14829 /* Output "rep; mov" instruction.
14830 Arguments have same meaning as for previous function */
14832 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
14833 rtx destptr, rtx srcptr,
14835 enum machine_mode mode)
14841 /* If the size is known, it is shorter to use rep movs. */
14842 if (mode == QImode && CONST_INT_P (count)
14843 && !(INTVAL (count) & 3))
14846 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14847 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14848 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
14849 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
14850 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14851 if (mode != QImode)
14853 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14854 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14855 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14856 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
14857 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14858 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
14862 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14863 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
14865 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
14869 /* Output "rep; stos" instruction.
14870 Arguments have same meaning as for previous function */
14872 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
14874 enum machine_mode mode)
14879 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14880 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14881 value = force_reg (mode, gen_lowpart (mode, value));
14882 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14883 if (mode != QImode)
14885 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14886 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14887 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14890 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14891 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
14895 emit_strmov (rtx destmem, rtx srcmem,
14896 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
14898 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
14899 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
14900 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14903 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
14905 expand_movmem_epilogue (rtx destmem, rtx srcmem,
14906 rtx destptr, rtx srcptr, rtx count, int max_size)
14909 if (CONST_INT_P (count))
14911 HOST_WIDE_INT countval = INTVAL (count);
14914 if ((countval & 0x10) && max_size > 16)
14918 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14919 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
14922 gcc_unreachable ();
14925 if ((countval & 0x08) && max_size > 8)
14928 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14931 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14932 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
14936 if ((countval & 0x04) && max_size > 4)
14938 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14941 if ((countval & 0x02) && max_size > 2)
14943 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
14946 if ((countval & 0x01) && max_size > 1)
14948 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
14955 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
14956 count, 1, OPTAB_DIRECT);
14957 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
14958 count, QImode, 1, 4);
14962 /* When there are stringops, we can cheaply increase dest and src pointers.
14963 Otherwise we save code size by maintaining offset (zero is readily
14964 available from preceding rep operation) and using x86 addressing modes.
14966 if (TARGET_SINGLE_STRINGOP)
14970 rtx label = ix86_expand_aligntest (count, 4, true);
14971 src = change_address (srcmem, SImode, srcptr);
14972 dest = change_address (destmem, SImode, destptr);
14973 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14974 emit_label (label);
14975 LABEL_NUSES (label) = 1;
14979 rtx label = ix86_expand_aligntest (count, 2, true);
14980 src = change_address (srcmem, HImode, srcptr);
14981 dest = change_address (destmem, HImode, destptr);
14982 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14983 emit_label (label);
14984 LABEL_NUSES (label) = 1;
14988 rtx label = ix86_expand_aligntest (count, 1, true);
14989 src = change_address (srcmem, QImode, srcptr);
14990 dest = change_address (destmem, QImode, destptr);
14991 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14992 emit_label (label);
14993 LABEL_NUSES (label) = 1;
14998 rtx offset = force_reg (Pmode, const0_rtx);
15003 rtx label = ix86_expand_aligntest (count, 4, true);
15004 src = change_address (srcmem, SImode, srcptr);
15005 dest = change_address (destmem, SImode, destptr);
15006 emit_move_insn (dest, src);
15007 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
15008 true, OPTAB_LIB_WIDEN);
15010 emit_move_insn (offset, tmp);
15011 emit_label (label);
15012 LABEL_NUSES (label) = 1;
15016 rtx label = ix86_expand_aligntest (count, 2, true);
15017 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
15018 src = change_address (srcmem, HImode, tmp);
15019 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
15020 dest = change_address (destmem, HImode, tmp);
15021 emit_move_insn (dest, src);
15022 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
15023 true, OPTAB_LIB_WIDEN);
15025 emit_move_insn (offset, tmp);
15026 emit_label (label);
15027 LABEL_NUSES (label) = 1;
15031 rtx label = ix86_expand_aligntest (count, 1, true);
15032 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
15033 src = change_address (srcmem, QImode, tmp);
15034 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
15035 dest = change_address (destmem, QImode, tmp);
15036 emit_move_insn (dest, src);
15037 emit_label (label);
15038 LABEL_NUSES (label) = 1;
15043 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
15045 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
15046 rtx count, int max_size)
15049 expand_simple_binop (counter_mode (count), AND, count,
15050 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
15051 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
15052 gen_lowpart (QImode, value), count, QImode,
15056 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
15058 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
15062 if (CONST_INT_P (count))
15064 HOST_WIDE_INT countval = INTVAL (count);
15067 if ((countval & 0x10) && max_size > 16)
15071 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
15072 emit_insn (gen_strset (destptr, dest, value));
15073 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
15074 emit_insn (gen_strset (destptr, dest, value));
15077 gcc_unreachable ();
15080 if ((countval & 0x08) && max_size > 8)
15084 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
15085 emit_insn (gen_strset (destptr, dest, value));
15089 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
15090 emit_insn (gen_strset (destptr, dest, value));
15091 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
15092 emit_insn (gen_strset (destptr, dest, value));
15096 if ((countval & 0x04) && max_size > 4)
15098 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
15099 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
15102 if ((countval & 0x02) && max_size > 2)
15104 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
15105 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
15108 if ((countval & 0x01) && max_size > 1)
15110 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
15111 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
15118 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
15123 rtx label = ix86_expand_aligntest (count, 16, true);
15126 dest = change_address (destmem, DImode, destptr);
15127 emit_insn (gen_strset (destptr, dest, value));
15128 emit_insn (gen_strset (destptr, dest, value));
15132 dest = change_address (destmem, SImode, destptr);
15133 emit_insn (gen_strset (destptr, dest, value));
15134 emit_insn (gen_strset (destptr, dest, value));
15135 emit_insn (gen_strset (destptr, dest, value));
15136 emit_insn (gen_strset (destptr, dest, value));
15138 emit_label (label);
15139 LABEL_NUSES (label) = 1;
15143 rtx label = ix86_expand_aligntest (count, 8, true);
15146 dest = change_address (destmem, DImode, destptr);
15147 emit_insn (gen_strset (destptr, dest, value));
15151 dest = change_address (destmem, SImode, destptr);
15152 emit_insn (gen_strset (destptr, dest, value));
15153 emit_insn (gen_strset (destptr, dest, value));
15155 emit_label (label);
15156 LABEL_NUSES (label) = 1;
15160 rtx label = ix86_expand_aligntest (count, 4, true);
15161 dest = change_address (destmem, SImode, destptr);
15162 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
15163 emit_label (label);
15164 LABEL_NUSES (label) = 1;
15168 rtx label = ix86_expand_aligntest (count, 2, true);
15169 dest = change_address (destmem, HImode, destptr);
15170 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
15171 emit_label (label);
15172 LABEL_NUSES (label) = 1;
15176 rtx label = ix86_expand_aligntest (count, 1, true);
15177 dest = change_address (destmem, QImode, destptr);
15178 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
15179 emit_label (label);
15180 LABEL_NUSES (label) = 1;
15184 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
15185 DESIRED_ALIGNMENT. */
15187 expand_movmem_prologue (rtx destmem, rtx srcmem,
15188 rtx destptr, rtx srcptr, rtx count,
15189 int align, int desired_alignment)
15191 if (align <= 1 && desired_alignment > 1)
15193 rtx label = ix86_expand_aligntest (destptr, 1, false);
15194 srcmem = change_address (srcmem, QImode, srcptr);
15195 destmem = change_address (destmem, QImode, destptr);
15196 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15197 ix86_adjust_counter (count, 1);
15198 emit_label (label);
15199 LABEL_NUSES (label) = 1;
15201 if (align <= 2 && desired_alignment > 2)
15203 rtx label = ix86_expand_aligntest (destptr, 2, false);
15204 srcmem = change_address (srcmem, HImode, srcptr);
15205 destmem = change_address (destmem, HImode, destptr);
15206 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15207 ix86_adjust_counter (count, 2);
15208 emit_label (label);
15209 LABEL_NUSES (label) = 1;
15211 if (align <= 4 && desired_alignment > 4)
15213 rtx label = ix86_expand_aligntest (destptr, 4, false);
15214 srcmem = change_address (srcmem, SImode, srcptr);
15215 destmem = change_address (destmem, SImode, destptr);
15216 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15217 ix86_adjust_counter (count, 4);
15218 emit_label (label);
15219 LABEL_NUSES (label) = 1;
15221 gcc_assert (desired_alignment <= 8);
15224 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
15225 DESIRED_ALIGNMENT. */
15227 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
15228 int align, int desired_alignment)
15230 if (align <= 1 && desired_alignment > 1)
15232 rtx label = ix86_expand_aligntest (destptr, 1, false);
15233 destmem = change_address (destmem, QImode, destptr);
15234 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
15235 ix86_adjust_counter (count, 1);
15236 emit_label (label);
15237 LABEL_NUSES (label) = 1;
15239 if (align <= 2 && desired_alignment > 2)
15241 rtx label = ix86_expand_aligntest (destptr, 2, false);
15242 destmem = change_address (destmem, HImode, destptr);
15243 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
15244 ix86_adjust_counter (count, 2);
15245 emit_label (label);
15246 LABEL_NUSES (label) = 1;
15248 if (align <= 4 && desired_alignment > 4)
15250 rtx label = ix86_expand_aligntest (destptr, 4, false);
15251 destmem = change_address (destmem, SImode, destptr);
15252 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
15253 ix86_adjust_counter (count, 4);
15254 emit_label (label);
15255 LABEL_NUSES (label) = 1;
15257 gcc_assert (desired_alignment <= 8);
15260 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
15261 static enum stringop_alg
15262 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
15263 int *dynamic_check)
15265 const struct stringop_algs * algs;
15266 /* Algorithms using the rep prefix want at least edi and ecx;
15267 additionally, memset wants eax and memcpy wants esi. Don't
15268 consider such algorithms if the user has appropriated those
15269 registers for their own purposes. */
15270 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
15272 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
15274 #define ALG_USABLE_P(alg) (rep_prefix_usable \
15275 || (alg != rep_prefix_1_byte \
15276 && alg != rep_prefix_4_byte \
15277 && alg != rep_prefix_8_byte))
15279 *dynamic_check = -1;
15281 algs = &ix86_cost->memset[TARGET_64BIT != 0];
15283 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
15284 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
15285 return stringop_alg;
15286 /* rep; movq or rep; movl is the smallest variant. */
15287 else if (optimize_size)
15289 if (!count || (count & 3))
15290 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
15292 return rep_prefix_usable ? rep_prefix_4_byte : loop;
15294 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
15296 else if (expected_size != -1 && expected_size < 4)
15297 return loop_1_byte;
15298 else if (expected_size != -1)
15301 enum stringop_alg alg = libcall;
15302 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
15304 /* We get here if the algorithms that were not libcall-based
15305 were rep-prefix based and we are unable to use rep prefixes
15306 based on global register usage. Break out of the loop and
15307 use the heuristic below. */
15308 if (algs->size[i].max == 0)
15310 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
15312 enum stringop_alg candidate = algs->size[i].alg;
15314 if (candidate != libcall && ALG_USABLE_P (candidate))
15316 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
15317 last non-libcall inline algorithm. */
15318 if (TARGET_INLINE_ALL_STRINGOPS)
15320 /* When the current size is best to be copied by a libcall,
15321 but we are still forced to inline, run the heuristic below
15322 that will pick code for medium sized blocks. */
15323 if (alg != libcall)
15327 else if (ALG_USABLE_P (candidate))
15331 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
15333 /* When asked to inline the call anyway, try to pick meaningful choice.
15334 We look for maximal size of block that is faster to copy by hand and
15335 take blocks of at most of that size guessing that average size will
15336 be roughly half of the block.
15338 If this turns out to be bad, we might simply specify the preferred
15339 choice in ix86_costs. */
15340 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15341 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
15344 enum stringop_alg alg;
15346 bool any_alg_usable_p = true;
15348 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
15350 enum stringop_alg candidate = algs->size[i].alg;
15351 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
15353 if (candidate != libcall && candidate
15354 && ALG_USABLE_P (candidate))
15355 max = algs->size[i].max;
15357 /* If there aren't any usable algorithms, then recursing on
15358 smaller sizes isn't going to find anything. Just return the
15359 simple byte-at-a-time copy loop. */
15360 if (!any_alg_usable_p)
15362 /* Pick something reasonable. */
15363 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15364 *dynamic_check = 128;
15365 return loop_1_byte;
15369 alg = decide_alg (count, max / 2, memset, dynamic_check);
15370 gcc_assert (*dynamic_check == -1);
15371 gcc_assert (alg != libcall);
15372 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15373 *dynamic_check = max;
15376 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
15377 #undef ALG_USABLE_P
15380 /* Decide on alignment. We know that the operand is already aligned to ALIGN
15381 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
15383 decide_alignment (int align,
15384 enum stringop_alg alg,
15387 int desired_align = 0;
15391 gcc_unreachable ();
15393 case unrolled_loop:
15394 desired_align = GET_MODE_SIZE (Pmode);
15396 case rep_prefix_8_byte:
15399 case rep_prefix_4_byte:
15400 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
15401 copying whole cacheline at once. */
15402 if (TARGET_PENTIUMPRO)
15407 case rep_prefix_1_byte:
15408 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
15409 copying whole cacheline at once. */
15410 if (TARGET_PENTIUMPRO)
15424 if (desired_align < align)
15425 desired_align = align;
15426 if (expected_size != -1 && expected_size < 4)
15427 desired_align = align;
15428 return desired_align;
15431 /* Return the smallest power of 2 greater than VAL. */
15433 smallest_pow2_greater_than (int val)
15441 /* Expand string move (memcpy) operation. Use i386 string operations when
15442 profitable. expand_setmem contains similar code. The code depends upon
15443 architecture, block size and alignment, but always has the same
15446 1) Prologue guard: Conditional that jumps up to epilogues for small
15447 blocks that can be handled by epilogue alone. This is faster but
15448 also needed for correctness, since prologue assume the block is larger
15449 than the desired alignment.
15451 Optional dynamic check for size and libcall for large
15452 blocks is emitted here too, with -minline-stringops-dynamically.
15454 2) Prologue: copy first few bytes in order to get destination aligned
15455 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
15456 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
15457 We emit either a jump tree on power of two sized blocks, or a byte loop.
15459 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
15460 with specified algorithm.
15462 4) Epilogue: code copying tail of the block that is too small to be
15463 handled by main body (or up to size guarded by prologue guard). */
15466 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
15467 rtx expected_align_exp, rtx expected_size_exp)
15473 rtx jump_around_label = NULL;
15474 HOST_WIDE_INT align = 1;
15475 unsigned HOST_WIDE_INT count = 0;
15476 HOST_WIDE_INT expected_size = -1;
15477 int size_needed = 0, epilogue_size_needed;
15478 int desired_align = 0;
15479 enum stringop_alg alg;
15482 if (CONST_INT_P (align_exp))
15483 align = INTVAL (align_exp);
15484 /* i386 can do misaligned access on reasonably increased cost. */
15485 if (CONST_INT_P (expected_align_exp)
15486 && INTVAL (expected_align_exp) > align)
15487 align = INTVAL (expected_align_exp);
15488 if (CONST_INT_P (count_exp))
15489 count = expected_size = INTVAL (count_exp);
15490 if (CONST_INT_P (expected_size_exp) && count == 0)
15491 expected_size = INTVAL (expected_size_exp);
15493 /* Make sure we don't need to care about overflow later on. */
15494 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
15497 /* Step 0: Decide on preferred algorithm, desired alignment and
15498 size of chunks to be copied by main loop. */
15500 alg = decide_alg (count, expected_size, false, &dynamic_check);
15501 desired_align = decide_alignment (align, alg, expected_size);
15503 if (!TARGET_ALIGN_STRINGOPS)
15504 align = desired_align;
15506 if (alg == libcall)
15508 gcc_assert (alg != no_stringop);
15510 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
15511 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
15512 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
15517 gcc_unreachable ();
15519 size_needed = GET_MODE_SIZE (Pmode);
15521 case unrolled_loop:
15522 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
15524 case rep_prefix_8_byte:
15527 case rep_prefix_4_byte:
15530 case rep_prefix_1_byte:
15536 epilogue_size_needed = size_needed;
15538 /* Step 1: Prologue guard. */
15540 /* Alignment code needs count to be in register. */
15541 if (CONST_INT_P (count_exp) && desired_align > align)
15542 count_exp = force_reg (counter_mode (count_exp), count_exp);
15543 gcc_assert (desired_align >= 1 && align >= 1);
15545 /* Ensure that alignment prologue won't copy past end of block. */
15546 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
15548 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
15549 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15550 Make sure it is power of 2. */
15551 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
15553 if (CONST_INT_P (count_exp))
15555 if (UINTVAL (count_exp) < (unsigned HOST_WIDE_INT)epilogue_size_needed)
15560 label = gen_label_rtx ();
15561 emit_cmp_and_jump_insns (count_exp,
15562 GEN_INT (epilogue_size_needed),
15563 LTU, 0, counter_mode (count_exp), 1, label);
15564 if (expected_size == -1 || expected_size < epilogue_size_needed)
15565 predict_jump (REG_BR_PROB_BASE * 60 / 100);
15567 predict_jump (REG_BR_PROB_BASE * 20 / 100);
15571 /* Emit code to decide on runtime whether library call or inline should be
15573 if (dynamic_check != -1)
15575 if (CONST_INT_P (count_exp))
15577 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
15579 emit_block_move_via_libcall (dst, src, count_exp, false);
15580 count_exp = const0_rtx;
15586 rtx hot_label = gen_label_rtx ();
15587 jump_around_label = gen_label_rtx ();
15588 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
15589 LEU, 0, GET_MODE (count_exp), 1, hot_label);
15590 predict_jump (REG_BR_PROB_BASE * 90 / 100);
15591 emit_block_move_via_libcall (dst, src, count_exp, false);
15592 emit_jump (jump_around_label);
15593 emit_label (hot_label);
15597 /* Step 2: Alignment prologue. */
15599 if (desired_align > align)
15601 /* Except for the first move in epilogue, we no longer know
15602 constant offset in aliasing info. It don't seems to worth
15603 the pain to maintain it for the first move, so throw away
15605 src = change_address (src, BLKmode, srcreg);
15606 dst = change_address (dst, BLKmode, destreg);
15607 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
15610 if (label && size_needed == 1)
15612 emit_label (label);
15613 LABEL_NUSES (label) = 1;
15617 /* Step 3: Main loop. */
15623 gcc_unreachable ();
15625 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15626 count_exp, QImode, 1, expected_size);
15629 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15630 count_exp, Pmode, 1, expected_size);
15632 case unrolled_loop:
15633 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
15634 registers for 4 temporaries anyway. */
15635 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15636 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
15639 case rep_prefix_8_byte:
15640 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15643 case rep_prefix_4_byte:
15644 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15647 case rep_prefix_1_byte:
15648 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15652 /* Adjust properly the offset of src and dest memory for aliasing. */
15653 if (CONST_INT_P (count_exp))
15655 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
15656 (count / size_needed) * size_needed);
15657 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
15658 (count / size_needed) * size_needed);
15662 src = change_address (src, BLKmode, srcreg);
15663 dst = change_address (dst, BLKmode, destreg);
15666 /* Step 4: Epilogue to copy the remaining bytes. */
15670 /* When the main loop is done, COUNT_EXP might hold original count,
15671 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15672 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15673 bytes. Compensate if needed. */
15675 if (size_needed < epilogue_size_needed)
15678 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
15679 GEN_INT (size_needed - 1), count_exp, 1,
15681 if (tmp != count_exp)
15682 emit_move_insn (count_exp, tmp);
15684 emit_label (label);
15685 LABEL_NUSES (label) = 1;
15688 if (count_exp != const0_rtx && epilogue_size_needed > 1)
15689 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
15690 epilogue_size_needed);
15691 if (jump_around_label)
15692 emit_label (jump_around_label);
15696 /* Helper function for memcpy. For QImode value 0xXY produce
15697 0xXYXYXYXY of wide specified by MODE. This is essentially
15698 a * 0x10101010, but we can do slightly better than
15699 synth_mult by unwinding the sequence by hand on CPUs with
15702 promote_duplicated_reg (enum machine_mode mode, rtx val)
15704 enum machine_mode valmode = GET_MODE (val);
15706 int nops = mode == DImode ? 3 : 2;
15708 gcc_assert (mode == SImode || mode == DImode);
15709 if (val == const0_rtx)
15710 return copy_to_mode_reg (mode, const0_rtx);
15711 if (CONST_INT_P (val))
15713 HOST_WIDE_INT v = INTVAL (val) & 255;
15717 if (mode == DImode)
15718 v |= (v << 16) << 16;
15719 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
15722 if (valmode == VOIDmode)
15724 if (valmode != QImode)
15725 val = gen_lowpart (QImode, val);
15726 if (mode == QImode)
15728 if (!TARGET_PARTIAL_REG_STALL)
15730 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
15731 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
15732 <= (ix86_cost->shift_const + ix86_cost->add) * nops
15733 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
15735 rtx reg = convert_modes (mode, QImode, val, true);
15736 tmp = promote_duplicated_reg (mode, const1_rtx);
15737 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
15742 rtx reg = convert_modes (mode, QImode, val, true);
15744 if (!TARGET_PARTIAL_REG_STALL)
15745 if (mode == SImode)
15746 emit_insn (gen_movsi_insv_1 (reg, reg));
15748 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
15751 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
15752 NULL, 1, OPTAB_DIRECT);
15754 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15756 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
15757 NULL, 1, OPTAB_DIRECT);
15758 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15759 if (mode == SImode)
15761 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
15762 NULL, 1, OPTAB_DIRECT);
15763 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15768 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
15769 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
15770 alignment from ALIGN to DESIRED_ALIGN. */
15772 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
15777 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
15778 promoted_val = promote_duplicated_reg (DImode, val);
15779 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
15780 promoted_val = promote_duplicated_reg (SImode, val);
15781 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
15782 promoted_val = promote_duplicated_reg (HImode, val);
15784 promoted_val = val;
15786 return promoted_val;
15789 /* Expand string clear operation (bzero). Use i386 string operations when
15790 profitable. See expand_movmem comment for explanation of individual
15791 steps performed. */
15793 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
15794 rtx expected_align_exp, rtx expected_size_exp)
15799 rtx jump_around_label = NULL;
15800 HOST_WIDE_INT align = 1;
15801 unsigned HOST_WIDE_INT count = 0;
15802 HOST_WIDE_INT expected_size = -1;
15803 int size_needed = 0, epilogue_size_needed;
15804 int desired_align = 0;
15805 enum stringop_alg alg;
15806 rtx promoted_val = NULL;
15807 bool force_loopy_epilogue = false;
15810 if (CONST_INT_P (align_exp))
15811 align = INTVAL (align_exp);
15812 /* i386 can do misaligned access on reasonably increased cost. */
15813 if (CONST_INT_P (expected_align_exp)
15814 && INTVAL (expected_align_exp) > align)
15815 align = INTVAL (expected_align_exp);
15816 if (CONST_INT_P (count_exp))
15817 count = expected_size = INTVAL (count_exp);
15818 if (CONST_INT_P (expected_size_exp) && count == 0)
15819 expected_size = INTVAL (expected_size_exp);
15821 /* Make sure we don't need to care about overflow later on. */
15822 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
15825 /* Step 0: Decide on preferred algorithm, desired alignment and
15826 size of chunks to be copied by main loop. */
15828 alg = decide_alg (count, expected_size, true, &dynamic_check);
15829 desired_align = decide_alignment (align, alg, expected_size);
15831 if (!TARGET_ALIGN_STRINGOPS)
15832 align = desired_align;
15834 if (alg == libcall)
15836 gcc_assert (alg != no_stringop);
15838 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
15839 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
15844 gcc_unreachable ();
15846 size_needed = GET_MODE_SIZE (Pmode);
15848 case unrolled_loop:
15849 size_needed = GET_MODE_SIZE (Pmode) * 4;
15851 case rep_prefix_8_byte:
15854 case rep_prefix_4_byte:
15857 case rep_prefix_1_byte:
15862 epilogue_size_needed = size_needed;
15864 /* Step 1: Prologue guard. */
15866 /* Alignment code needs count to be in register. */
15867 if (CONST_INT_P (count_exp) && desired_align > align)
15869 enum machine_mode mode = SImode;
15870 if (TARGET_64BIT && (count & ~0xffffffff))
15872 count_exp = force_reg (mode, count_exp);
15874 /* Do the cheap promotion to allow better CSE across the
15875 main loop and epilogue (ie one load of the big constant in the
15876 front of all code. */
15877 if (CONST_INT_P (val_exp))
15878 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15879 desired_align, align);
15880 /* Ensure that alignment prologue won't copy past end of block. */
15881 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
15883 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
15884 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15885 Make sure it is power of 2. */
15886 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
15888 /* To improve performance of small blocks, we jump around the VAL
15889 promoting mode. This mean that if the promoted VAL is not constant,
15890 we might not use it in the epilogue and have to use byte
15892 if (epilogue_size_needed > 2 && !promoted_val)
15893 force_loopy_epilogue = true;
15894 label = gen_label_rtx ();
15895 emit_cmp_and_jump_insns (count_exp,
15896 GEN_INT (epilogue_size_needed),
15897 LTU, 0, counter_mode (count_exp), 1, label);
15898 if (GET_CODE (count_exp) == CONST_INT)
15900 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
15901 predict_jump (REG_BR_PROB_BASE * 60 / 100);
15903 predict_jump (REG_BR_PROB_BASE * 20 / 100);
15905 if (dynamic_check != -1)
15907 rtx hot_label = gen_label_rtx ();
15908 jump_around_label = gen_label_rtx ();
15909 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
15910 LEU, 0, counter_mode (count_exp), 1, hot_label);
15911 predict_jump (REG_BR_PROB_BASE * 90 / 100);
15912 set_storage_via_libcall (dst, count_exp, val_exp, false);
15913 emit_jump (jump_around_label);
15914 emit_label (hot_label);
15917 /* Step 2: Alignment prologue. */
15919 /* Do the expensive promotion once we branched off the small blocks. */
15921 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15922 desired_align, align);
15923 gcc_assert (desired_align >= 1 && align >= 1);
15925 if (desired_align > align)
15927 /* Except for the first move in epilogue, we no longer know
15928 constant offset in aliasing info. It don't seems to worth
15929 the pain to maintain it for the first move, so throw away
15931 dst = change_address (dst, BLKmode, destreg);
15932 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
15935 if (label && size_needed == 1)
15937 emit_label (label);
15938 LABEL_NUSES (label) = 1;
15942 /* Step 3: Main loop. */
15948 gcc_unreachable ();
15950 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15951 count_exp, QImode, 1, expected_size);
15954 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15955 count_exp, Pmode, 1, expected_size);
15957 case unrolled_loop:
15958 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15959 count_exp, Pmode, 4, expected_size);
15961 case rep_prefix_8_byte:
15962 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15965 case rep_prefix_4_byte:
15966 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15969 case rep_prefix_1_byte:
15970 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15974 /* Adjust properly the offset of src and dest memory for aliasing. */
15975 if (CONST_INT_P (count_exp))
15976 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
15977 (count / size_needed) * size_needed);
15979 dst = change_address (dst, BLKmode, destreg);
15981 /* Step 4: Epilogue to copy the remaining bytes. */
15985 /* When the main loop is done, COUNT_EXP might hold original count,
15986 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15987 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15988 bytes. Compensate if needed. */
15990 if (size_needed < desired_align - align)
15993 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
15994 GEN_INT (size_needed - 1), count_exp, 1,
15996 size_needed = desired_align - align + 1;
15997 if (tmp != count_exp)
15998 emit_move_insn (count_exp, tmp);
16000 emit_label (label);
16001 LABEL_NUSES (label) = 1;
16003 if (count_exp != const0_rtx && epilogue_size_needed > 1)
16005 if (force_loopy_epilogue)
16006 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
16009 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
16012 if (jump_around_label)
16013 emit_label (jump_around_label);
16017 /* Expand the appropriate insns for doing strlen if not just doing
16020 out = result, initialized with the start address
16021 align_rtx = alignment of the address.
16022 scratch = scratch register, initialized with the startaddress when
16023 not aligned, otherwise undefined
16025 This is just the body. It needs the initializations mentioned above and
16026 some address computing at the end. These things are done in i386.md. */
16029 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
16033 rtx align_2_label = NULL_RTX;
16034 rtx align_3_label = NULL_RTX;
16035 rtx align_4_label = gen_label_rtx ();
16036 rtx end_0_label = gen_label_rtx ();
16038 rtx tmpreg = gen_reg_rtx (SImode);
16039 rtx scratch = gen_reg_rtx (SImode);
16043 if (CONST_INT_P (align_rtx))
16044 align = INTVAL (align_rtx);
16046 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
16048 /* Is there a known alignment and is it less than 4? */
16051 rtx scratch1 = gen_reg_rtx (Pmode);
16052 emit_move_insn (scratch1, out);
16053 /* Is there a known alignment and is it not 2? */
16056 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
16057 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
16059 /* Leave just the 3 lower bits. */
16060 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
16061 NULL_RTX, 0, OPTAB_WIDEN);
16063 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
16064 Pmode, 1, align_4_label);
16065 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
16066 Pmode, 1, align_2_label);
16067 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
16068 Pmode, 1, align_3_label);
16072 /* Since the alignment is 2, we have to check 2 or 0 bytes;
16073 check if is aligned to 4 - byte. */
16075 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
16076 NULL_RTX, 0, OPTAB_WIDEN);
16078 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
16079 Pmode, 1, align_4_label);
16082 mem = change_address (src, QImode, out);
16084 /* Now compare the bytes. */
16086 /* Compare the first n unaligned byte on a byte per byte basis. */
16087 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
16088 QImode, 1, end_0_label);
16090 /* Increment the address. */
16092 emit_insn (gen_adddi3 (out, out, const1_rtx));
16094 emit_insn (gen_addsi3 (out, out, const1_rtx));
16096 /* Not needed with an alignment of 2 */
16099 emit_label (align_2_label);
16101 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
16105 emit_insn (gen_adddi3 (out, out, const1_rtx));
16107 emit_insn (gen_addsi3 (out, out, const1_rtx));
16109 emit_label (align_3_label);
16112 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
16116 emit_insn (gen_adddi3 (out, out, const1_rtx));
16118 emit_insn (gen_addsi3 (out, out, const1_rtx));
16121 /* Generate loop to check 4 bytes at a time. It is not a good idea to
16122 align this loop. It gives only huge programs, but does not help to
16124 emit_label (align_4_label);
16126 mem = change_address (src, SImode, out);
16127 emit_move_insn (scratch, mem);
16129 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
16131 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
16133 /* This formula yields a nonzero result iff one of the bytes is zero.
16134 This saves three branches inside loop and many cycles. */
16136 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
16137 emit_insn (gen_one_cmplsi2 (scratch, scratch));
16138 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
16139 emit_insn (gen_andsi3 (tmpreg, tmpreg,
16140 gen_int_mode (0x80808080, SImode)));
16141 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
16146 rtx reg = gen_reg_rtx (SImode);
16147 rtx reg2 = gen_reg_rtx (Pmode);
16148 emit_move_insn (reg, tmpreg);
16149 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
16151 /* If zero is not in the first two bytes, move two bytes forward. */
16152 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
16153 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16154 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
16155 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
16156 gen_rtx_IF_THEN_ELSE (SImode, tmp,
16159 /* Emit lea manually to avoid clobbering of flags. */
16160 emit_insn (gen_rtx_SET (SImode, reg2,
16161 gen_rtx_PLUS (Pmode, out, const2_rtx)));
16163 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16164 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
16165 emit_insn (gen_rtx_SET (VOIDmode, out,
16166 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
16173 rtx end_2_label = gen_label_rtx ();
16174 /* Is zero in the first two bytes? */
16176 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
16177 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16178 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
16179 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
16180 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
16182 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
16183 JUMP_LABEL (tmp) = end_2_label;
16185 /* Not in the first two. Move two bytes forward. */
16186 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
16188 emit_insn (gen_adddi3 (out, out, const2_rtx));
16190 emit_insn (gen_addsi3 (out, out, const2_rtx));
16192 emit_label (end_2_label);
16196 /* Avoid branch in fixing the byte. */
16197 tmpreg = gen_lowpart (QImode, tmpreg);
16198 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
16199 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
16201 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
16203 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
16205 emit_label (end_0_label);
16208 /* Expand strlen. */
16211 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
16213 rtx addr, scratch1, scratch2, scratch3, scratch4;
16215 /* The generic case of strlen expander is long. Avoid it's
16216 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
16218 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
16219 && !TARGET_INLINE_ALL_STRINGOPS
16221 && (!CONST_INT_P (align) || INTVAL (align) < 4))
16224 addr = force_reg (Pmode, XEXP (src, 0));
16225 scratch1 = gen_reg_rtx (Pmode);
16227 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
16230 /* Well it seems that some optimizer does not combine a call like
16231 foo(strlen(bar), strlen(bar));
16232 when the move and the subtraction is done here. It does calculate
16233 the length just once when these instructions are done inside of
16234 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
16235 often used and I use one fewer register for the lifetime of
16236 output_strlen_unroll() this is better. */
16238 emit_move_insn (out, addr);
16240 ix86_expand_strlensi_unroll_1 (out, src, align);
16242 /* strlensi_unroll_1 returns the address of the zero at the end of
16243 the string, like memchr(), so compute the length by subtracting
16244 the start address. */
16246 emit_insn (gen_subdi3 (out, out, addr));
16248 emit_insn (gen_subsi3 (out, out, addr));
16254 /* Can't use this if the user has appropriated eax, ecx, or edi. */
16255 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
16258 scratch2 = gen_reg_rtx (Pmode);
16259 scratch3 = gen_reg_rtx (Pmode);
16260 scratch4 = force_reg (Pmode, constm1_rtx);
16262 emit_move_insn (scratch3, addr);
16263 eoschar = force_reg (QImode, eoschar);
16265 src = replace_equiv_address_nv (src, scratch3);
16267 /* If .md starts supporting :P, this can be done in .md. */
16268 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
16269 scratch4), UNSPEC_SCAS);
16270 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
16273 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
16274 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
16278 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
16279 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
16285 /* For given symbol (function) construct code to compute address of it's PLT
16286 entry in large x86-64 PIC model. */
16288 construct_plt_address (rtx symbol)
16290 rtx tmp = gen_reg_rtx (Pmode);
16291 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
16293 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
16294 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
16296 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
16297 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
16302 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
16303 rtx callarg2 ATTRIBUTE_UNUSED,
16304 rtx pop, int sibcall)
16306 rtx use = NULL, call;
16308 if (pop == const0_rtx)
16310 gcc_assert (!TARGET_64BIT || !pop);
16312 if (TARGET_MACHO && !TARGET_64BIT)
16315 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
16316 fnaddr = machopic_indirect_call_target (fnaddr);
16321 /* Static functions and indirect calls don't need the pic register. */
16322 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
16323 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
16324 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
16325 use_reg (&use, pic_offset_table_rtx);
16328 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
16330 rtx al = gen_rtx_REG (QImode, AX_REG);
16331 emit_move_insn (al, callarg2);
16332 use_reg (&use, al);
16335 if (ix86_cmodel == CM_LARGE_PIC
16336 && GET_CODE (fnaddr) == MEM
16337 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
16338 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
16339 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
16340 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
16342 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
16343 fnaddr = gen_rtx_MEM (QImode, fnaddr);
16345 if (sibcall && TARGET_64BIT
16346 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
16349 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
16350 fnaddr = gen_rtx_REG (Pmode, R11_REG);
16351 emit_move_insn (fnaddr, addr);
16352 fnaddr = gen_rtx_MEM (QImode, fnaddr);
16355 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
16357 call = gen_rtx_SET (VOIDmode, retval, call);
16360 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
16361 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
16362 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
16365 call = emit_call_insn (call);
16367 CALL_INSN_FUNCTION_USAGE (call) = use;
16371 /* Clear stack slot assignments remembered from previous functions.
16372 This is called from INIT_EXPANDERS once before RTL is emitted for each
16375 static struct machine_function *
16376 ix86_init_machine_status (void)
16378 struct machine_function *f;
16380 f = GGC_CNEW (struct machine_function);
16381 f->use_fast_prologue_epilogue_nregs = -1;
16382 f->tls_descriptor_call_expanded_p = 0;
16387 /* Return a MEM corresponding to a stack slot with mode MODE.
16388 Allocate a new slot if necessary.
16390 The RTL for a function can have several slots available: N is
16391 which slot to use. */
16394 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
16396 struct stack_local_entry *s;
16398 gcc_assert (n < MAX_386_STACK_LOCALS);
16400 /* Virtual slot is valid only before vregs are instantiated. */
16401 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
16403 for (s = ix86_stack_locals; s; s = s->next)
16404 if (s->mode == mode && s->n == n)
16405 return copy_rtx (s->rtl);
16407 s = (struct stack_local_entry *)
16408 ggc_alloc (sizeof (struct stack_local_entry));
16411 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
16413 s->next = ix86_stack_locals;
16414 ix86_stack_locals = s;
16418 /* Construct the SYMBOL_REF for the tls_get_addr function. */
16420 static GTY(()) rtx ix86_tls_symbol;
16422 ix86_tls_get_addr (void)
16425 if (!ix86_tls_symbol)
16427 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
16428 (TARGET_ANY_GNU_TLS
16430 ? "___tls_get_addr"
16431 : "__tls_get_addr");
16434 return ix86_tls_symbol;
16437 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
16439 static GTY(()) rtx ix86_tls_module_base_symbol;
16441 ix86_tls_module_base (void)
16444 if (!ix86_tls_module_base_symbol)
16446 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
16447 "_TLS_MODULE_BASE_");
16448 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
16449 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
16452 return ix86_tls_module_base_symbol;
16455 /* Calculate the length of the memory address in the instruction
16456 encoding. Does not include the one-byte modrm, opcode, or prefix. */
16459 memory_address_length (rtx addr)
16461 struct ix86_address parts;
16462 rtx base, index, disp;
16466 if (GET_CODE (addr) == PRE_DEC
16467 || GET_CODE (addr) == POST_INC
16468 || GET_CODE (addr) == PRE_MODIFY
16469 || GET_CODE (addr) == POST_MODIFY)
16472 ok = ix86_decompose_address (addr, &parts);
16475 if (parts.base && GET_CODE (parts.base) == SUBREG)
16476 parts.base = SUBREG_REG (parts.base);
16477 if (parts.index && GET_CODE (parts.index) == SUBREG)
16478 parts.index = SUBREG_REG (parts.index);
16481 index = parts.index;
16486 - esp as the base always wants an index,
16487 - ebp as the base always wants a displacement. */
16489 /* Register Indirect. */
16490 if (base && !index && !disp)
16492 /* esp (for its index) and ebp (for its displacement) need
16493 the two-byte modrm form. */
16494 if (addr == stack_pointer_rtx
16495 || addr == arg_pointer_rtx
16496 || addr == frame_pointer_rtx
16497 || addr == hard_frame_pointer_rtx)
16501 /* Direct Addressing. */
16502 else if (disp && !base && !index)
16507 /* Find the length of the displacement constant. */
16510 if (base && satisfies_constraint_K (disp))
16515 /* ebp always wants a displacement. */
16516 else if (base == hard_frame_pointer_rtx)
16519 /* An index requires the two-byte modrm form.... */
16521 /* ...like esp, which always wants an index. */
16522 || base == stack_pointer_rtx
16523 || base == arg_pointer_rtx
16524 || base == frame_pointer_rtx)
16531 /* Compute default value for "length_immediate" attribute. When SHORTFORM
16532 is set, expect that insn have 8bit immediate alternative. */
16534 ix86_attr_length_immediate_default (rtx insn, int shortform)
16538 extract_insn_cached (insn);
16539 for (i = recog_data.n_operands - 1; i >= 0; --i)
16540 if (CONSTANT_P (recog_data.operand[i]))
16543 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
16547 switch (get_attr_mode (insn))
16558 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
16563 fatal_insn ("unknown insn mode", insn);
16569 /* Compute default value for "length_address" attribute. */
16571 ix86_attr_length_address_default (rtx insn)
16575 if (get_attr_type (insn) == TYPE_LEA)
16577 rtx set = PATTERN (insn);
16579 if (GET_CODE (set) == PARALLEL)
16580 set = XVECEXP (set, 0, 0);
16582 gcc_assert (GET_CODE (set) == SET);
16584 return memory_address_length (SET_SRC (set));
16587 extract_insn_cached (insn);
16588 for (i = recog_data.n_operands - 1; i >= 0; --i)
16589 if (MEM_P (recog_data.operand[i]))
16591 return memory_address_length (XEXP (recog_data.operand[i], 0));
16597 /* Return the maximum number of instructions a cpu can issue. */
16600 ix86_issue_rate (void)
16604 case PROCESSOR_PENTIUM:
16608 case PROCESSOR_PENTIUMPRO:
16609 case PROCESSOR_PENTIUM4:
16610 case PROCESSOR_ATHLON:
16612 case PROCESSOR_AMDFAM10:
16613 case PROCESSOR_NOCONA:
16614 case PROCESSOR_GENERIC32:
16615 case PROCESSOR_GENERIC64:
16618 case PROCESSOR_CORE2:
16626 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
16627 by DEP_INSN and nothing set by DEP_INSN. */
16630 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
16634 /* Simplify the test for uninteresting insns. */
16635 if (insn_type != TYPE_SETCC
16636 && insn_type != TYPE_ICMOV
16637 && insn_type != TYPE_FCMOV
16638 && insn_type != TYPE_IBR)
16641 if ((set = single_set (dep_insn)) != 0)
16643 set = SET_DEST (set);
16646 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
16647 && XVECLEN (PATTERN (dep_insn), 0) == 2
16648 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
16649 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
16651 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
16652 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
16657 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
16660 /* This test is true if the dependent insn reads the flags but
16661 not any other potentially set register. */
16662 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
16665 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
16671 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
16672 address with operands set by DEP_INSN. */
16675 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
16679 if (insn_type == TYPE_LEA
16682 addr = PATTERN (insn);
16684 if (GET_CODE (addr) == PARALLEL)
16685 addr = XVECEXP (addr, 0, 0);
16687 gcc_assert (GET_CODE (addr) == SET);
16689 addr = SET_SRC (addr);
16694 extract_insn_cached (insn);
16695 for (i = recog_data.n_operands - 1; i >= 0; --i)
16696 if (MEM_P (recog_data.operand[i]))
16698 addr = XEXP (recog_data.operand[i], 0);
16705 return modified_in_p (addr, dep_insn);
16709 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
16711 enum attr_type insn_type, dep_insn_type;
16712 enum attr_memory memory;
16714 int dep_insn_code_number;
16716 /* Anti and output dependencies have zero cost on all CPUs. */
16717 if (REG_NOTE_KIND (link) != 0)
16720 dep_insn_code_number = recog_memoized (dep_insn);
16722 /* If we can't recognize the insns, we can't really do anything. */
16723 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
16726 insn_type = get_attr_type (insn);
16727 dep_insn_type = get_attr_type (dep_insn);
16731 case PROCESSOR_PENTIUM:
16732 /* Address Generation Interlock adds a cycle of latency. */
16733 if (ix86_agi_dependent (insn, dep_insn, insn_type))
16736 /* ??? Compares pair with jump/setcc. */
16737 if (ix86_flags_dependent (insn, dep_insn, insn_type))
16740 /* Floating point stores require value to be ready one cycle earlier. */
16741 if (insn_type == TYPE_FMOV
16742 && get_attr_memory (insn) == MEMORY_STORE
16743 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16747 case PROCESSOR_PENTIUMPRO:
16748 memory = get_attr_memory (insn);
16750 /* INT->FP conversion is expensive. */
16751 if (get_attr_fp_int_src (dep_insn))
16754 /* There is one cycle extra latency between an FP op and a store. */
16755 if (insn_type == TYPE_FMOV
16756 && (set = single_set (dep_insn)) != NULL_RTX
16757 && (set2 = single_set (insn)) != NULL_RTX
16758 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
16759 && MEM_P (SET_DEST (set2)))
16762 /* Show ability of reorder buffer to hide latency of load by executing
16763 in parallel with previous instruction in case
16764 previous instruction is not needed to compute the address. */
16765 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16766 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16768 /* Claim moves to take one cycle, as core can issue one load
16769 at time and the next load can start cycle later. */
16770 if (dep_insn_type == TYPE_IMOV
16771 || dep_insn_type == TYPE_FMOV)
16779 memory = get_attr_memory (insn);
16781 /* The esp dependency is resolved before the instruction is really
16783 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
16784 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
16787 /* INT->FP conversion is expensive. */
16788 if (get_attr_fp_int_src (dep_insn))
16791 /* Show ability of reorder buffer to hide latency of load by executing
16792 in parallel with previous instruction in case
16793 previous instruction is not needed to compute the address. */
16794 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16795 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16797 /* Claim moves to take one cycle, as core can issue one load
16798 at time and the next load can start cycle later. */
16799 if (dep_insn_type == TYPE_IMOV
16800 || dep_insn_type == TYPE_FMOV)
16809 case PROCESSOR_ATHLON:
16811 case PROCESSOR_AMDFAM10:
16812 case PROCESSOR_GENERIC32:
16813 case PROCESSOR_GENERIC64:
16814 memory = get_attr_memory (insn);
16816 /* Show ability of reorder buffer to hide latency of load by executing
16817 in parallel with previous instruction in case
16818 previous instruction is not needed to compute the address. */
16819 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16820 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16822 enum attr_unit unit = get_attr_unit (insn);
16825 /* Because of the difference between the length of integer and
16826 floating unit pipeline preparation stages, the memory operands
16827 for floating point are cheaper.
16829 ??? For Athlon it the difference is most probably 2. */
16830 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
16833 loadcost = TARGET_ATHLON ? 2 : 0;
16835 if (cost >= loadcost)
16848 /* How many alternative schedules to try. This should be as wide as the
16849 scheduling freedom in the DFA, but no wider. Making this value too
16850 large results extra work for the scheduler. */
16853 ia32_multipass_dfa_lookahead (void)
16857 case PROCESSOR_PENTIUM:
16860 case PROCESSOR_PENTIUMPRO:
16870 /* Compute the alignment given to a constant that is being placed in memory.
16871 EXP is the constant and ALIGN is the alignment that the object would
16873 The value of this function is used instead of that alignment to align
16877 ix86_constant_alignment (tree exp, int align)
16879 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
16880 || TREE_CODE (exp) == INTEGER_CST)
16882 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
16884 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
16887 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
16888 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
16889 return BITS_PER_WORD;
16894 /* Compute the alignment for a static variable.
16895 TYPE is the data type, and ALIGN is the alignment that
16896 the object would ordinarily have. The value of this function is used
16897 instead of that alignment to align the object. */
16900 ix86_data_alignment (tree type, int align)
16902 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
16904 if (AGGREGATE_TYPE_P (type)
16905 && TYPE_SIZE (type)
16906 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16907 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
16908 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
16909 && align < max_align)
16912 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16913 to 16byte boundary. */
16916 if (AGGREGATE_TYPE_P (type)
16917 && TYPE_SIZE (type)
16918 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16919 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
16920 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16924 if (TREE_CODE (type) == ARRAY_TYPE)
16926 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16928 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16931 else if (TREE_CODE (type) == COMPLEX_TYPE)
16934 if (TYPE_MODE (type) == DCmode && align < 64)
16936 if (TYPE_MODE (type) == XCmode && align < 128)
16939 else if ((TREE_CODE (type) == RECORD_TYPE
16940 || TREE_CODE (type) == UNION_TYPE
16941 || TREE_CODE (type) == QUAL_UNION_TYPE)
16942 && TYPE_FIELDS (type))
16944 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16946 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16949 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16950 || TREE_CODE (type) == INTEGER_TYPE)
16952 if (TYPE_MODE (type) == DFmode && align < 64)
16954 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16961 /* Compute the alignment for a local variable.
16962 TYPE is the data type, and ALIGN is the alignment that
16963 the object would ordinarily have. The value of this macro is used
16964 instead of that alignment to align the object. */
16967 ix86_local_alignment (tree type, int align)
16969 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16970 to 16byte boundary. */
16973 if (AGGREGATE_TYPE_P (type)
16974 && TYPE_SIZE (type)
16975 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16976 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
16977 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16980 if (TREE_CODE (type) == ARRAY_TYPE)
16982 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16984 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16987 else if (TREE_CODE (type) == COMPLEX_TYPE)
16989 if (TYPE_MODE (type) == DCmode && align < 64)
16991 if (TYPE_MODE (type) == XCmode && align < 128)
16994 else if ((TREE_CODE (type) == RECORD_TYPE
16995 || TREE_CODE (type) == UNION_TYPE
16996 || TREE_CODE (type) == QUAL_UNION_TYPE)
16997 && TYPE_FIELDS (type))
16999 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
17001 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
17004 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
17005 || TREE_CODE (type) == INTEGER_TYPE)
17008 if (TYPE_MODE (type) == DFmode && align < 64)
17010 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
17016 /* Emit RTL insns to initialize the variable parts of a trampoline.
17017 FNADDR is an RTX for the address of the function's pure code.
17018 CXT is an RTX for the static chain value for the function. */
17020 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
17024 /* Compute offset from the end of the jmp to the target function. */
17025 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
17026 plus_constant (tramp, 10),
17027 NULL_RTX, 1, OPTAB_DIRECT);
17028 emit_move_insn (gen_rtx_MEM (QImode, tramp),
17029 gen_int_mode (0xb9, QImode));
17030 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
17031 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
17032 gen_int_mode (0xe9, QImode));
17033 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
17038 /* Try to load address using shorter movl instead of movabs.
17039 We may want to support movq for kernel mode, but kernel does not use
17040 trampolines at the moment. */
17041 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
17043 fnaddr = copy_to_mode_reg (DImode, fnaddr);
17044 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
17045 gen_int_mode (0xbb41, HImode));
17046 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
17047 gen_lowpart (SImode, fnaddr));
17052 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
17053 gen_int_mode (0xbb49, HImode));
17054 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
17058 /* Load static chain using movabs to r10. */
17059 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
17060 gen_int_mode (0xba49, HImode));
17061 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
17064 /* Jump to the r11 */
17065 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
17066 gen_int_mode (0xff49, HImode));
17067 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
17068 gen_int_mode (0xe3, QImode));
17070 gcc_assert (offset <= TRAMPOLINE_SIZE);
17073 #ifdef ENABLE_EXECUTE_STACK
17074 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
17075 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
17079 /* Codes for all the SSE/MMX builtins. */
17082 IX86_BUILTIN_ADDPS,
17083 IX86_BUILTIN_ADDSS,
17084 IX86_BUILTIN_DIVPS,
17085 IX86_BUILTIN_DIVSS,
17086 IX86_BUILTIN_MULPS,
17087 IX86_BUILTIN_MULSS,
17088 IX86_BUILTIN_SUBPS,
17089 IX86_BUILTIN_SUBSS,
17091 IX86_BUILTIN_CMPEQPS,
17092 IX86_BUILTIN_CMPLTPS,
17093 IX86_BUILTIN_CMPLEPS,
17094 IX86_BUILTIN_CMPGTPS,
17095 IX86_BUILTIN_CMPGEPS,
17096 IX86_BUILTIN_CMPNEQPS,
17097 IX86_BUILTIN_CMPNLTPS,
17098 IX86_BUILTIN_CMPNLEPS,
17099 IX86_BUILTIN_CMPNGTPS,
17100 IX86_BUILTIN_CMPNGEPS,
17101 IX86_BUILTIN_CMPORDPS,
17102 IX86_BUILTIN_CMPUNORDPS,
17103 IX86_BUILTIN_CMPEQSS,
17104 IX86_BUILTIN_CMPLTSS,
17105 IX86_BUILTIN_CMPLESS,
17106 IX86_BUILTIN_CMPNEQSS,
17107 IX86_BUILTIN_CMPNLTSS,
17108 IX86_BUILTIN_CMPNLESS,
17109 IX86_BUILTIN_CMPNGTSS,
17110 IX86_BUILTIN_CMPNGESS,
17111 IX86_BUILTIN_CMPORDSS,
17112 IX86_BUILTIN_CMPUNORDSS,
17114 IX86_BUILTIN_COMIEQSS,
17115 IX86_BUILTIN_COMILTSS,
17116 IX86_BUILTIN_COMILESS,
17117 IX86_BUILTIN_COMIGTSS,
17118 IX86_BUILTIN_COMIGESS,
17119 IX86_BUILTIN_COMINEQSS,
17120 IX86_BUILTIN_UCOMIEQSS,
17121 IX86_BUILTIN_UCOMILTSS,
17122 IX86_BUILTIN_UCOMILESS,
17123 IX86_BUILTIN_UCOMIGTSS,
17124 IX86_BUILTIN_UCOMIGESS,
17125 IX86_BUILTIN_UCOMINEQSS,
17127 IX86_BUILTIN_CVTPI2PS,
17128 IX86_BUILTIN_CVTPS2PI,
17129 IX86_BUILTIN_CVTSI2SS,
17130 IX86_BUILTIN_CVTSI642SS,
17131 IX86_BUILTIN_CVTSS2SI,
17132 IX86_BUILTIN_CVTSS2SI64,
17133 IX86_BUILTIN_CVTTPS2PI,
17134 IX86_BUILTIN_CVTTSS2SI,
17135 IX86_BUILTIN_CVTTSS2SI64,
17137 IX86_BUILTIN_MAXPS,
17138 IX86_BUILTIN_MAXSS,
17139 IX86_BUILTIN_MINPS,
17140 IX86_BUILTIN_MINSS,
17142 IX86_BUILTIN_LOADUPS,
17143 IX86_BUILTIN_STOREUPS,
17144 IX86_BUILTIN_MOVSS,
17146 IX86_BUILTIN_MOVHLPS,
17147 IX86_BUILTIN_MOVLHPS,
17148 IX86_BUILTIN_LOADHPS,
17149 IX86_BUILTIN_LOADLPS,
17150 IX86_BUILTIN_STOREHPS,
17151 IX86_BUILTIN_STORELPS,
17153 IX86_BUILTIN_MASKMOVQ,
17154 IX86_BUILTIN_MOVMSKPS,
17155 IX86_BUILTIN_PMOVMSKB,
17157 IX86_BUILTIN_MOVNTPS,
17158 IX86_BUILTIN_MOVNTQ,
17160 IX86_BUILTIN_LOADDQU,
17161 IX86_BUILTIN_STOREDQU,
17163 IX86_BUILTIN_PACKSSWB,
17164 IX86_BUILTIN_PACKSSDW,
17165 IX86_BUILTIN_PACKUSWB,
17167 IX86_BUILTIN_PADDB,
17168 IX86_BUILTIN_PADDW,
17169 IX86_BUILTIN_PADDD,
17170 IX86_BUILTIN_PADDQ,
17171 IX86_BUILTIN_PADDSB,
17172 IX86_BUILTIN_PADDSW,
17173 IX86_BUILTIN_PADDUSB,
17174 IX86_BUILTIN_PADDUSW,
17175 IX86_BUILTIN_PSUBB,
17176 IX86_BUILTIN_PSUBW,
17177 IX86_BUILTIN_PSUBD,
17178 IX86_BUILTIN_PSUBQ,
17179 IX86_BUILTIN_PSUBSB,
17180 IX86_BUILTIN_PSUBSW,
17181 IX86_BUILTIN_PSUBUSB,
17182 IX86_BUILTIN_PSUBUSW,
17185 IX86_BUILTIN_PANDN,
17189 IX86_BUILTIN_PAVGB,
17190 IX86_BUILTIN_PAVGW,
17192 IX86_BUILTIN_PCMPEQB,
17193 IX86_BUILTIN_PCMPEQW,
17194 IX86_BUILTIN_PCMPEQD,
17195 IX86_BUILTIN_PCMPGTB,
17196 IX86_BUILTIN_PCMPGTW,
17197 IX86_BUILTIN_PCMPGTD,
17199 IX86_BUILTIN_PMADDWD,
17201 IX86_BUILTIN_PMAXSW,
17202 IX86_BUILTIN_PMAXUB,
17203 IX86_BUILTIN_PMINSW,
17204 IX86_BUILTIN_PMINUB,
17206 IX86_BUILTIN_PMULHUW,
17207 IX86_BUILTIN_PMULHW,
17208 IX86_BUILTIN_PMULLW,
17210 IX86_BUILTIN_PSADBW,
17211 IX86_BUILTIN_PSHUFW,
17213 IX86_BUILTIN_PSLLW,
17214 IX86_BUILTIN_PSLLD,
17215 IX86_BUILTIN_PSLLQ,
17216 IX86_BUILTIN_PSRAW,
17217 IX86_BUILTIN_PSRAD,
17218 IX86_BUILTIN_PSRLW,
17219 IX86_BUILTIN_PSRLD,
17220 IX86_BUILTIN_PSRLQ,
17221 IX86_BUILTIN_PSLLWI,
17222 IX86_BUILTIN_PSLLDI,
17223 IX86_BUILTIN_PSLLQI,
17224 IX86_BUILTIN_PSRAWI,
17225 IX86_BUILTIN_PSRADI,
17226 IX86_BUILTIN_PSRLWI,
17227 IX86_BUILTIN_PSRLDI,
17228 IX86_BUILTIN_PSRLQI,
17230 IX86_BUILTIN_PUNPCKHBW,
17231 IX86_BUILTIN_PUNPCKHWD,
17232 IX86_BUILTIN_PUNPCKHDQ,
17233 IX86_BUILTIN_PUNPCKLBW,
17234 IX86_BUILTIN_PUNPCKLWD,
17235 IX86_BUILTIN_PUNPCKLDQ,
17237 IX86_BUILTIN_SHUFPS,
17239 IX86_BUILTIN_RCPPS,
17240 IX86_BUILTIN_RCPSS,
17241 IX86_BUILTIN_RSQRTPS,
17242 IX86_BUILTIN_RSQRTPS_NR,
17243 IX86_BUILTIN_RSQRTSS,
17244 IX86_BUILTIN_RSQRTF,
17245 IX86_BUILTIN_SQRTPS,
17246 IX86_BUILTIN_SQRTPS_NR,
17247 IX86_BUILTIN_SQRTSS,
17249 IX86_BUILTIN_UNPCKHPS,
17250 IX86_BUILTIN_UNPCKLPS,
17252 IX86_BUILTIN_ANDPS,
17253 IX86_BUILTIN_ANDNPS,
17255 IX86_BUILTIN_XORPS,
17258 IX86_BUILTIN_LDMXCSR,
17259 IX86_BUILTIN_STMXCSR,
17260 IX86_BUILTIN_SFENCE,
17262 /* 3DNow! Original */
17263 IX86_BUILTIN_FEMMS,
17264 IX86_BUILTIN_PAVGUSB,
17265 IX86_BUILTIN_PF2ID,
17266 IX86_BUILTIN_PFACC,
17267 IX86_BUILTIN_PFADD,
17268 IX86_BUILTIN_PFCMPEQ,
17269 IX86_BUILTIN_PFCMPGE,
17270 IX86_BUILTIN_PFCMPGT,
17271 IX86_BUILTIN_PFMAX,
17272 IX86_BUILTIN_PFMIN,
17273 IX86_BUILTIN_PFMUL,
17274 IX86_BUILTIN_PFRCP,
17275 IX86_BUILTIN_PFRCPIT1,
17276 IX86_BUILTIN_PFRCPIT2,
17277 IX86_BUILTIN_PFRSQIT1,
17278 IX86_BUILTIN_PFRSQRT,
17279 IX86_BUILTIN_PFSUB,
17280 IX86_BUILTIN_PFSUBR,
17281 IX86_BUILTIN_PI2FD,
17282 IX86_BUILTIN_PMULHRW,
17284 /* 3DNow! Athlon Extensions */
17285 IX86_BUILTIN_PF2IW,
17286 IX86_BUILTIN_PFNACC,
17287 IX86_BUILTIN_PFPNACC,
17288 IX86_BUILTIN_PI2FW,
17289 IX86_BUILTIN_PSWAPDSI,
17290 IX86_BUILTIN_PSWAPDSF,
17293 IX86_BUILTIN_ADDPD,
17294 IX86_BUILTIN_ADDSD,
17295 IX86_BUILTIN_DIVPD,
17296 IX86_BUILTIN_DIVSD,
17297 IX86_BUILTIN_MULPD,
17298 IX86_BUILTIN_MULSD,
17299 IX86_BUILTIN_SUBPD,
17300 IX86_BUILTIN_SUBSD,
17302 IX86_BUILTIN_CMPEQPD,
17303 IX86_BUILTIN_CMPLTPD,
17304 IX86_BUILTIN_CMPLEPD,
17305 IX86_BUILTIN_CMPGTPD,
17306 IX86_BUILTIN_CMPGEPD,
17307 IX86_BUILTIN_CMPNEQPD,
17308 IX86_BUILTIN_CMPNLTPD,
17309 IX86_BUILTIN_CMPNLEPD,
17310 IX86_BUILTIN_CMPNGTPD,
17311 IX86_BUILTIN_CMPNGEPD,
17312 IX86_BUILTIN_CMPORDPD,
17313 IX86_BUILTIN_CMPUNORDPD,
17314 IX86_BUILTIN_CMPEQSD,
17315 IX86_BUILTIN_CMPLTSD,
17316 IX86_BUILTIN_CMPLESD,
17317 IX86_BUILTIN_CMPNEQSD,
17318 IX86_BUILTIN_CMPNLTSD,
17319 IX86_BUILTIN_CMPNLESD,
17320 IX86_BUILTIN_CMPORDSD,
17321 IX86_BUILTIN_CMPUNORDSD,
17323 IX86_BUILTIN_COMIEQSD,
17324 IX86_BUILTIN_COMILTSD,
17325 IX86_BUILTIN_COMILESD,
17326 IX86_BUILTIN_COMIGTSD,
17327 IX86_BUILTIN_COMIGESD,
17328 IX86_BUILTIN_COMINEQSD,
17329 IX86_BUILTIN_UCOMIEQSD,
17330 IX86_BUILTIN_UCOMILTSD,
17331 IX86_BUILTIN_UCOMILESD,
17332 IX86_BUILTIN_UCOMIGTSD,
17333 IX86_BUILTIN_UCOMIGESD,
17334 IX86_BUILTIN_UCOMINEQSD,
17336 IX86_BUILTIN_MAXPD,
17337 IX86_BUILTIN_MAXSD,
17338 IX86_BUILTIN_MINPD,
17339 IX86_BUILTIN_MINSD,
17341 IX86_BUILTIN_ANDPD,
17342 IX86_BUILTIN_ANDNPD,
17344 IX86_BUILTIN_XORPD,
17346 IX86_BUILTIN_SQRTPD,
17347 IX86_BUILTIN_SQRTSD,
17349 IX86_BUILTIN_UNPCKHPD,
17350 IX86_BUILTIN_UNPCKLPD,
17352 IX86_BUILTIN_SHUFPD,
17354 IX86_BUILTIN_LOADUPD,
17355 IX86_BUILTIN_STOREUPD,
17356 IX86_BUILTIN_MOVSD,
17358 IX86_BUILTIN_LOADHPD,
17359 IX86_BUILTIN_LOADLPD,
17361 IX86_BUILTIN_CVTDQ2PD,
17362 IX86_BUILTIN_CVTDQ2PS,
17364 IX86_BUILTIN_CVTPD2DQ,
17365 IX86_BUILTIN_CVTPD2PI,
17366 IX86_BUILTIN_CVTPD2PS,
17367 IX86_BUILTIN_CVTTPD2DQ,
17368 IX86_BUILTIN_CVTTPD2PI,
17370 IX86_BUILTIN_CVTPI2PD,
17371 IX86_BUILTIN_CVTSI2SD,
17372 IX86_BUILTIN_CVTSI642SD,
17374 IX86_BUILTIN_CVTSD2SI,
17375 IX86_BUILTIN_CVTSD2SI64,
17376 IX86_BUILTIN_CVTSD2SS,
17377 IX86_BUILTIN_CVTSS2SD,
17378 IX86_BUILTIN_CVTTSD2SI,
17379 IX86_BUILTIN_CVTTSD2SI64,
17381 IX86_BUILTIN_CVTPS2DQ,
17382 IX86_BUILTIN_CVTPS2PD,
17383 IX86_BUILTIN_CVTTPS2DQ,
17385 IX86_BUILTIN_MOVNTI,
17386 IX86_BUILTIN_MOVNTPD,
17387 IX86_BUILTIN_MOVNTDQ,
17390 IX86_BUILTIN_MASKMOVDQU,
17391 IX86_BUILTIN_MOVMSKPD,
17392 IX86_BUILTIN_PMOVMSKB128,
17394 IX86_BUILTIN_PACKSSWB128,
17395 IX86_BUILTIN_PACKSSDW128,
17396 IX86_BUILTIN_PACKUSWB128,
17398 IX86_BUILTIN_PADDB128,
17399 IX86_BUILTIN_PADDW128,
17400 IX86_BUILTIN_PADDD128,
17401 IX86_BUILTIN_PADDQ128,
17402 IX86_BUILTIN_PADDSB128,
17403 IX86_BUILTIN_PADDSW128,
17404 IX86_BUILTIN_PADDUSB128,
17405 IX86_BUILTIN_PADDUSW128,
17406 IX86_BUILTIN_PSUBB128,
17407 IX86_BUILTIN_PSUBW128,
17408 IX86_BUILTIN_PSUBD128,
17409 IX86_BUILTIN_PSUBQ128,
17410 IX86_BUILTIN_PSUBSB128,
17411 IX86_BUILTIN_PSUBSW128,
17412 IX86_BUILTIN_PSUBUSB128,
17413 IX86_BUILTIN_PSUBUSW128,
17415 IX86_BUILTIN_PAND128,
17416 IX86_BUILTIN_PANDN128,
17417 IX86_BUILTIN_POR128,
17418 IX86_BUILTIN_PXOR128,
17420 IX86_BUILTIN_PAVGB128,
17421 IX86_BUILTIN_PAVGW128,
17423 IX86_BUILTIN_PCMPEQB128,
17424 IX86_BUILTIN_PCMPEQW128,
17425 IX86_BUILTIN_PCMPEQD128,
17426 IX86_BUILTIN_PCMPGTB128,
17427 IX86_BUILTIN_PCMPGTW128,
17428 IX86_BUILTIN_PCMPGTD128,
17430 IX86_BUILTIN_PMADDWD128,
17432 IX86_BUILTIN_PMAXSW128,
17433 IX86_BUILTIN_PMAXUB128,
17434 IX86_BUILTIN_PMINSW128,
17435 IX86_BUILTIN_PMINUB128,
17437 IX86_BUILTIN_PMULUDQ,
17438 IX86_BUILTIN_PMULUDQ128,
17439 IX86_BUILTIN_PMULHUW128,
17440 IX86_BUILTIN_PMULHW128,
17441 IX86_BUILTIN_PMULLW128,
17443 IX86_BUILTIN_PSADBW128,
17444 IX86_BUILTIN_PSHUFHW,
17445 IX86_BUILTIN_PSHUFLW,
17446 IX86_BUILTIN_PSHUFD,
17448 IX86_BUILTIN_PSLLDQI128,
17449 IX86_BUILTIN_PSLLWI128,
17450 IX86_BUILTIN_PSLLDI128,
17451 IX86_BUILTIN_PSLLQI128,
17452 IX86_BUILTIN_PSRAWI128,
17453 IX86_BUILTIN_PSRADI128,
17454 IX86_BUILTIN_PSRLDQI128,
17455 IX86_BUILTIN_PSRLWI128,
17456 IX86_BUILTIN_PSRLDI128,
17457 IX86_BUILTIN_PSRLQI128,
17459 IX86_BUILTIN_PSLLDQ128,
17460 IX86_BUILTIN_PSLLW128,
17461 IX86_BUILTIN_PSLLD128,
17462 IX86_BUILTIN_PSLLQ128,
17463 IX86_BUILTIN_PSRAW128,
17464 IX86_BUILTIN_PSRAD128,
17465 IX86_BUILTIN_PSRLW128,
17466 IX86_BUILTIN_PSRLD128,
17467 IX86_BUILTIN_PSRLQ128,
17469 IX86_BUILTIN_PUNPCKHBW128,
17470 IX86_BUILTIN_PUNPCKHWD128,
17471 IX86_BUILTIN_PUNPCKHDQ128,
17472 IX86_BUILTIN_PUNPCKHQDQ128,
17473 IX86_BUILTIN_PUNPCKLBW128,
17474 IX86_BUILTIN_PUNPCKLWD128,
17475 IX86_BUILTIN_PUNPCKLDQ128,
17476 IX86_BUILTIN_PUNPCKLQDQ128,
17478 IX86_BUILTIN_CLFLUSH,
17479 IX86_BUILTIN_MFENCE,
17480 IX86_BUILTIN_LFENCE,
17482 /* Prescott New Instructions. */
17483 IX86_BUILTIN_ADDSUBPS,
17484 IX86_BUILTIN_HADDPS,
17485 IX86_BUILTIN_HSUBPS,
17486 IX86_BUILTIN_MOVSHDUP,
17487 IX86_BUILTIN_MOVSLDUP,
17488 IX86_BUILTIN_ADDSUBPD,
17489 IX86_BUILTIN_HADDPD,
17490 IX86_BUILTIN_HSUBPD,
17491 IX86_BUILTIN_LDDQU,
17493 IX86_BUILTIN_MONITOR,
17494 IX86_BUILTIN_MWAIT,
17497 IX86_BUILTIN_PHADDW,
17498 IX86_BUILTIN_PHADDD,
17499 IX86_BUILTIN_PHADDSW,
17500 IX86_BUILTIN_PHSUBW,
17501 IX86_BUILTIN_PHSUBD,
17502 IX86_BUILTIN_PHSUBSW,
17503 IX86_BUILTIN_PMADDUBSW,
17504 IX86_BUILTIN_PMULHRSW,
17505 IX86_BUILTIN_PSHUFB,
17506 IX86_BUILTIN_PSIGNB,
17507 IX86_BUILTIN_PSIGNW,
17508 IX86_BUILTIN_PSIGND,
17509 IX86_BUILTIN_PALIGNR,
17510 IX86_BUILTIN_PABSB,
17511 IX86_BUILTIN_PABSW,
17512 IX86_BUILTIN_PABSD,
17514 IX86_BUILTIN_PHADDW128,
17515 IX86_BUILTIN_PHADDD128,
17516 IX86_BUILTIN_PHADDSW128,
17517 IX86_BUILTIN_PHSUBW128,
17518 IX86_BUILTIN_PHSUBD128,
17519 IX86_BUILTIN_PHSUBSW128,
17520 IX86_BUILTIN_PMADDUBSW128,
17521 IX86_BUILTIN_PMULHRSW128,
17522 IX86_BUILTIN_PSHUFB128,
17523 IX86_BUILTIN_PSIGNB128,
17524 IX86_BUILTIN_PSIGNW128,
17525 IX86_BUILTIN_PSIGND128,
17526 IX86_BUILTIN_PALIGNR128,
17527 IX86_BUILTIN_PABSB128,
17528 IX86_BUILTIN_PABSW128,
17529 IX86_BUILTIN_PABSD128,
17531 /* AMDFAM10 - SSE4A New Instructions. */
17532 IX86_BUILTIN_MOVNTSD,
17533 IX86_BUILTIN_MOVNTSS,
17534 IX86_BUILTIN_EXTRQI,
17535 IX86_BUILTIN_EXTRQ,
17536 IX86_BUILTIN_INSERTQI,
17537 IX86_BUILTIN_INSERTQ,
17540 IX86_BUILTIN_BLENDPD,
17541 IX86_BUILTIN_BLENDPS,
17542 IX86_BUILTIN_BLENDVPD,
17543 IX86_BUILTIN_BLENDVPS,
17544 IX86_BUILTIN_PBLENDVB128,
17545 IX86_BUILTIN_PBLENDW128,
17550 IX86_BUILTIN_INSERTPS128,
17552 IX86_BUILTIN_MOVNTDQA,
17553 IX86_BUILTIN_MPSADBW128,
17554 IX86_BUILTIN_PACKUSDW128,
17555 IX86_BUILTIN_PCMPEQQ,
17556 IX86_BUILTIN_PHMINPOSUW128,
17558 IX86_BUILTIN_PMAXSB128,
17559 IX86_BUILTIN_PMAXSD128,
17560 IX86_BUILTIN_PMAXUD128,
17561 IX86_BUILTIN_PMAXUW128,
17563 IX86_BUILTIN_PMINSB128,
17564 IX86_BUILTIN_PMINSD128,
17565 IX86_BUILTIN_PMINUD128,
17566 IX86_BUILTIN_PMINUW128,
17568 IX86_BUILTIN_PMOVSXBW128,
17569 IX86_BUILTIN_PMOVSXBD128,
17570 IX86_BUILTIN_PMOVSXBQ128,
17571 IX86_BUILTIN_PMOVSXWD128,
17572 IX86_BUILTIN_PMOVSXWQ128,
17573 IX86_BUILTIN_PMOVSXDQ128,
17575 IX86_BUILTIN_PMOVZXBW128,
17576 IX86_BUILTIN_PMOVZXBD128,
17577 IX86_BUILTIN_PMOVZXBQ128,
17578 IX86_BUILTIN_PMOVZXWD128,
17579 IX86_BUILTIN_PMOVZXWQ128,
17580 IX86_BUILTIN_PMOVZXDQ128,
17582 IX86_BUILTIN_PMULDQ128,
17583 IX86_BUILTIN_PMULLD128,
17585 IX86_BUILTIN_ROUNDPD,
17586 IX86_BUILTIN_ROUNDPS,
17587 IX86_BUILTIN_ROUNDSD,
17588 IX86_BUILTIN_ROUNDSS,
17590 IX86_BUILTIN_PTESTZ,
17591 IX86_BUILTIN_PTESTC,
17592 IX86_BUILTIN_PTESTNZC,
17594 IX86_BUILTIN_VEC_INIT_V2SI,
17595 IX86_BUILTIN_VEC_INIT_V4HI,
17596 IX86_BUILTIN_VEC_INIT_V8QI,
17597 IX86_BUILTIN_VEC_EXT_V2DF,
17598 IX86_BUILTIN_VEC_EXT_V2DI,
17599 IX86_BUILTIN_VEC_EXT_V4SF,
17600 IX86_BUILTIN_VEC_EXT_V4SI,
17601 IX86_BUILTIN_VEC_EXT_V8HI,
17602 IX86_BUILTIN_VEC_EXT_V2SI,
17603 IX86_BUILTIN_VEC_EXT_V4HI,
17604 IX86_BUILTIN_VEC_EXT_V16QI,
17605 IX86_BUILTIN_VEC_SET_V2DI,
17606 IX86_BUILTIN_VEC_SET_V4SF,
17607 IX86_BUILTIN_VEC_SET_V4SI,
17608 IX86_BUILTIN_VEC_SET_V8HI,
17609 IX86_BUILTIN_VEC_SET_V4HI,
17610 IX86_BUILTIN_VEC_SET_V16QI,
17612 IX86_BUILTIN_VEC_PACK_SFIX,
17615 IX86_BUILTIN_CRC32QI,
17616 IX86_BUILTIN_CRC32HI,
17617 IX86_BUILTIN_CRC32SI,
17618 IX86_BUILTIN_CRC32DI,
17620 IX86_BUILTIN_PCMPESTRI128,
17621 IX86_BUILTIN_PCMPESTRM128,
17622 IX86_BUILTIN_PCMPESTRA128,
17623 IX86_BUILTIN_PCMPESTRC128,
17624 IX86_BUILTIN_PCMPESTRO128,
17625 IX86_BUILTIN_PCMPESTRS128,
17626 IX86_BUILTIN_PCMPESTRZ128,
17627 IX86_BUILTIN_PCMPISTRI128,
17628 IX86_BUILTIN_PCMPISTRM128,
17629 IX86_BUILTIN_PCMPISTRA128,
17630 IX86_BUILTIN_PCMPISTRC128,
17631 IX86_BUILTIN_PCMPISTRO128,
17632 IX86_BUILTIN_PCMPISTRS128,
17633 IX86_BUILTIN_PCMPISTRZ128,
17635 IX86_BUILTIN_PCMPGTQ,
17637 /* AES instructions */
17638 IX86_BUILTIN_AESENC128,
17639 IX86_BUILTIN_AESENCLAST128,
17640 IX86_BUILTIN_AESDEC128,
17641 IX86_BUILTIN_AESDECLAST128,
17642 IX86_BUILTIN_AESIMC128,
17643 IX86_BUILTIN_AESKEYGENASSIST128,
17645 /* PCLMUL instruction */
17646 IX86_BUILTIN_PCLMULQDQ128,
17648 /* TFmode support builtins. */
17650 IX86_BUILTIN_FABSQ,
17651 IX86_BUILTIN_COPYSIGNQ,
17653 /* SSE5 instructions */
17654 IX86_BUILTIN_FMADDSS,
17655 IX86_BUILTIN_FMADDSD,
17656 IX86_BUILTIN_FMADDPS,
17657 IX86_BUILTIN_FMADDPD,
17658 IX86_BUILTIN_FMSUBSS,
17659 IX86_BUILTIN_FMSUBSD,
17660 IX86_BUILTIN_FMSUBPS,
17661 IX86_BUILTIN_FMSUBPD,
17662 IX86_BUILTIN_FNMADDSS,
17663 IX86_BUILTIN_FNMADDSD,
17664 IX86_BUILTIN_FNMADDPS,
17665 IX86_BUILTIN_FNMADDPD,
17666 IX86_BUILTIN_FNMSUBSS,
17667 IX86_BUILTIN_FNMSUBSD,
17668 IX86_BUILTIN_FNMSUBPS,
17669 IX86_BUILTIN_FNMSUBPD,
17670 IX86_BUILTIN_PCMOV_V2DI,
17671 IX86_BUILTIN_PCMOV_V4SI,
17672 IX86_BUILTIN_PCMOV_V8HI,
17673 IX86_BUILTIN_PCMOV_V16QI,
17674 IX86_BUILTIN_PCMOV_V4SF,
17675 IX86_BUILTIN_PCMOV_V2DF,
17676 IX86_BUILTIN_PPERM,
17677 IX86_BUILTIN_PERMPS,
17678 IX86_BUILTIN_PERMPD,
17679 IX86_BUILTIN_PMACSSWW,
17680 IX86_BUILTIN_PMACSWW,
17681 IX86_BUILTIN_PMACSSWD,
17682 IX86_BUILTIN_PMACSWD,
17683 IX86_BUILTIN_PMACSSDD,
17684 IX86_BUILTIN_PMACSDD,
17685 IX86_BUILTIN_PMACSSDQL,
17686 IX86_BUILTIN_PMACSSDQH,
17687 IX86_BUILTIN_PMACSDQL,
17688 IX86_BUILTIN_PMACSDQH,
17689 IX86_BUILTIN_PMADCSSWD,
17690 IX86_BUILTIN_PMADCSWD,
17691 IX86_BUILTIN_PHADDBW,
17692 IX86_BUILTIN_PHADDBD,
17693 IX86_BUILTIN_PHADDBQ,
17694 IX86_BUILTIN_PHADDWD,
17695 IX86_BUILTIN_PHADDWQ,
17696 IX86_BUILTIN_PHADDDQ,
17697 IX86_BUILTIN_PHADDUBW,
17698 IX86_BUILTIN_PHADDUBD,
17699 IX86_BUILTIN_PHADDUBQ,
17700 IX86_BUILTIN_PHADDUWD,
17701 IX86_BUILTIN_PHADDUWQ,
17702 IX86_BUILTIN_PHADDUDQ,
17703 IX86_BUILTIN_PHSUBBW,
17704 IX86_BUILTIN_PHSUBWD,
17705 IX86_BUILTIN_PHSUBDQ,
17706 IX86_BUILTIN_PROTB,
17707 IX86_BUILTIN_PROTW,
17708 IX86_BUILTIN_PROTD,
17709 IX86_BUILTIN_PROTQ,
17710 IX86_BUILTIN_PROTB_IMM,
17711 IX86_BUILTIN_PROTW_IMM,
17712 IX86_BUILTIN_PROTD_IMM,
17713 IX86_BUILTIN_PROTQ_IMM,
17714 IX86_BUILTIN_PSHLB,
17715 IX86_BUILTIN_PSHLW,
17716 IX86_BUILTIN_PSHLD,
17717 IX86_BUILTIN_PSHLQ,
17718 IX86_BUILTIN_PSHAB,
17719 IX86_BUILTIN_PSHAW,
17720 IX86_BUILTIN_PSHAD,
17721 IX86_BUILTIN_PSHAQ,
17722 IX86_BUILTIN_FRCZSS,
17723 IX86_BUILTIN_FRCZSD,
17724 IX86_BUILTIN_FRCZPS,
17725 IX86_BUILTIN_FRCZPD,
17726 IX86_BUILTIN_CVTPH2PS,
17727 IX86_BUILTIN_CVTPS2PH,
17729 IX86_BUILTIN_COMEQSS,
17730 IX86_BUILTIN_COMNESS,
17731 IX86_BUILTIN_COMLTSS,
17732 IX86_BUILTIN_COMLESS,
17733 IX86_BUILTIN_COMGTSS,
17734 IX86_BUILTIN_COMGESS,
17735 IX86_BUILTIN_COMUEQSS,
17736 IX86_BUILTIN_COMUNESS,
17737 IX86_BUILTIN_COMULTSS,
17738 IX86_BUILTIN_COMULESS,
17739 IX86_BUILTIN_COMUGTSS,
17740 IX86_BUILTIN_COMUGESS,
17741 IX86_BUILTIN_COMORDSS,
17742 IX86_BUILTIN_COMUNORDSS,
17743 IX86_BUILTIN_COMFALSESS,
17744 IX86_BUILTIN_COMTRUESS,
17746 IX86_BUILTIN_COMEQSD,
17747 IX86_BUILTIN_COMNESD,
17748 IX86_BUILTIN_COMLTSD,
17749 IX86_BUILTIN_COMLESD,
17750 IX86_BUILTIN_COMGTSD,
17751 IX86_BUILTIN_COMGESD,
17752 IX86_BUILTIN_COMUEQSD,
17753 IX86_BUILTIN_COMUNESD,
17754 IX86_BUILTIN_COMULTSD,
17755 IX86_BUILTIN_COMULESD,
17756 IX86_BUILTIN_COMUGTSD,
17757 IX86_BUILTIN_COMUGESD,
17758 IX86_BUILTIN_COMORDSD,
17759 IX86_BUILTIN_COMUNORDSD,
17760 IX86_BUILTIN_COMFALSESD,
17761 IX86_BUILTIN_COMTRUESD,
17763 IX86_BUILTIN_COMEQPS,
17764 IX86_BUILTIN_COMNEPS,
17765 IX86_BUILTIN_COMLTPS,
17766 IX86_BUILTIN_COMLEPS,
17767 IX86_BUILTIN_COMGTPS,
17768 IX86_BUILTIN_COMGEPS,
17769 IX86_BUILTIN_COMUEQPS,
17770 IX86_BUILTIN_COMUNEPS,
17771 IX86_BUILTIN_COMULTPS,
17772 IX86_BUILTIN_COMULEPS,
17773 IX86_BUILTIN_COMUGTPS,
17774 IX86_BUILTIN_COMUGEPS,
17775 IX86_BUILTIN_COMORDPS,
17776 IX86_BUILTIN_COMUNORDPS,
17777 IX86_BUILTIN_COMFALSEPS,
17778 IX86_BUILTIN_COMTRUEPS,
17780 IX86_BUILTIN_COMEQPD,
17781 IX86_BUILTIN_COMNEPD,
17782 IX86_BUILTIN_COMLTPD,
17783 IX86_BUILTIN_COMLEPD,
17784 IX86_BUILTIN_COMGTPD,
17785 IX86_BUILTIN_COMGEPD,
17786 IX86_BUILTIN_COMUEQPD,
17787 IX86_BUILTIN_COMUNEPD,
17788 IX86_BUILTIN_COMULTPD,
17789 IX86_BUILTIN_COMULEPD,
17790 IX86_BUILTIN_COMUGTPD,
17791 IX86_BUILTIN_COMUGEPD,
17792 IX86_BUILTIN_COMORDPD,
17793 IX86_BUILTIN_COMUNORDPD,
17794 IX86_BUILTIN_COMFALSEPD,
17795 IX86_BUILTIN_COMTRUEPD,
17797 IX86_BUILTIN_PCOMEQUB,
17798 IX86_BUILTIN_PCOMNEUB,
17799 IX86_BUILTIN_PCOMLTUB,
17800 IX86_BUILTIN_PCOMLEUB,
17801 IX86_BUILTIN_PCOMGTUB,
17802 IX86_BUILTIN_PCOMGEUB,
17803 IX86_BUILTIN_PCOMFALSEUB,
17804 IX86_BUILTIN_PCOMTRUEUB,
17805 IX86_BUILTIN_PCOMEQUW,
17806 IX86_BUILTIN_PCOMNEUW,
17807 IX86_BUILTIN_PCOMLTUW,
17808 IX86_BUILTIN_PCOMLEUW,
17809 IX86_BUILTIN_PCOMGTUW,
17810 IX86_BUILTIN_PCOMGEUW,
17811 IX86_BUILTIN_PCOMFALSEUW,
17812 IX86_BUILTIN_PCOMTRUEUW,
17813 IX86_BUILTIN_PCOMEQUD,
17814 IX86_BUILTIN_PCOMNEUD,
17815 IX86_BUILTIN_PCOMLTUD,
17816 IX86_BUILTIN_PCOMLEUD,
17817 IX86_BUILTIN_PCOMGTUD,
17818 IX86_BUILTIN_PCOMGEUD,
17819 IX86_BUILTIN_PCOMFALSEUD,
17820 IX86_BUILTIN_PCOMTRUEUD,
17821 IX86_BUILTIN_PCOMEQUQ,
17822 IX86_BUILTIN_PCOMNEUQ,
17823 IX86_BUILTIN_PCOMLTUQ,
17824 IX86_BUILTIN_PCOMLEUQ,
17825 IX86_BUILTIN_PCOMGTUQ,
17826 IX86_BUILTIN_PCOMGEUQ,
17827 IX86_BUILTIN_PCOMFALSEUQ,
17828 IX86_BUILTIN_PCOMTRUEUQ,
17830 IX86_BUILTIN_PCOMEQB,
17831 IX86_BUILTIN_PCOMNEB,
17832 IX86_BUILTIN_PCOMLTB,
17833 IX86_BUILTIN_PCOMLEB,
17834 IX86_BUILTIN_PCOMGTB,
17835 IX86_BUILTIN_PCOMGEB,
17836 IX86_BUILTIN_PCOMFALSEB,
17837 IX86_BUILTIN_PCOMTRUEB,
17838 IX86_BUILTIN_PCOMEQW,
17839 IX86_BUILTIN_PCOMNEW,
17840 IX86_BUILTIN_PCOMLTW,
17841 IX86_BUILTIN_PCOMLEW,
17842 IX86_BUILTIN_PCOMGTW,
17843 IX86_BUILTIN_PCOMGEW,
17844 IX86_BUILTIN_PCOMFALSEW,
17845 IX86_BUILTIN_PCOMTRUEW,
17846 IX86_BUILTIN_PCOMEQD,
17847 IX86_BUILTIN_PCOMNED,
17848 IX86_BUILTIN_PCOMLTD,
17849 IX86_BUILTIN_PCOMLED,
17850 IX86_BUILTIN_PCOMGTD,
17851 IX86_BUILTIN_PCOMGED,
17852 IX86_BUILTIN_PCOMFALSED,
17853 IX86_BUILTIN_PCOMTRUED,
17854 IX86_BUILTIN_PCOMEQQ,
17855 IX86_BUILTIN_PCOMNEQ,
17856 IX86_BUILTIN_PCOMLTQ,
17857 IX86_BUILTIN_PCOMLEQ,
17858 IX86_BUILTIN_PCOMGTQ,
17859 IX86_BUILTIN_PCOMGEQ,
17860 IX86_BUILTIN_PCOMFALSEQ,
17861 IX86_BUILTIN_PCOMTRUEQ,
17866 /* Table for the ix86 builtin decls. */
17867 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
17869 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Do so,
17870 * if the target_flags include one of MASK. Stores the function decl
17871 * in the ix86_builtins array.
17872 * Returns the function decl or NULL_TREE, if the builtin was not added. */
17875 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
17877 tree decl = NULL_TREE;
17879 if (mask & ix86_isa_flags
17880 && (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT))
17882 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
17884 ix86_builtins[(int) code] = decl;
17890 /* Like def_builtin, but also marks the function decl "const". */
17893 def_builtin_const (int mask, const char *name, tree type,
17894 enum ix86_builtins code)
17896 tree decl = def_builtin (mask, name, type, code);
17898 TREE_READONLY (decl) = 1;
17902 /* Bits for builtin_description.flag. */
17904 /* Set when we don't support the comparison natively, and should
17905 swap_comparison in order to support it. */
17906 #define BUILTIN_DESC_SWAP_OPERANDS 1
17908 struct builtin_description
17910 const unsigned int mask;
17911 const enum insn_code icode;
17912 const char *const name;
17913 const enum ix86_builtins code;
17914 const enum rtx_code comparison;
17918 static const struct builtin_description bdesc_comi[] =
17920 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
17921 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
17922 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
17923 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
17924 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
17925 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
17926 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
17927 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
17928 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
17929 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
17930 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
17931 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
17932 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
17933 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
17934 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
17935 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
17936 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
17937 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
17938 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
17939 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
17940 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
17941 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
17942 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
17943 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
17946 static const struct builtin_description bdesc_ptest[] =
17949 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, 0 },
17950 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, 0 },
17951 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, 0 },
17954 static const struct builtin_description bdesc_pcmpestr[] =
17957 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
17958 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
17959 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
17960 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
17961 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
17962 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
17963 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
17966 static const struct builtin_description bdesc_pcmpistr[] =
17969 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
17970 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
17971 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
17972 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
17973 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
17974 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
17975 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
17978 static const struct builtin_description bdesc_crc32[] =
17981 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32qi, 0, IX86_BUILTIN_CRC32QI, UNKNOWN, 0 },
17982 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, 0, IX86_BUILTIN_CRC32HI, UNKNOWN, 0 },
17983 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, 0, IX86_BUILTIN_CRC32SI, UNKNOWN, 0 },
17984 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32di, 0, IX86_BUILTIN_CRC32DI, UNKNOWN, 0 },
17987 /* SSE builtins with 3 arguments and the last argument must be an immediate or xmm0. */
17988 static const struct builtin_description bdesc_sse_3arg[] =
17991 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, 0 },
17994 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, 0 },
17997 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, 0 },
17998 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, 0 },
17999 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, 0 },
18000 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, 0 },
18001 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, 0 },
18002 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, 0 },
18003 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, 0 },
18004 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, 0 },
18005 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, 0 },
18006 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, 0 },
18007 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, 0 },
18008 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, 0 },
18011 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, 0 },
18014 static const struct builtin_description bdesc_2arg[] =
18017 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, 0 },
18018 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, 0 },
18019 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, 0 },
18020 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, 0 },
18021 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, 0 },
18022 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, 0 },
18023 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, 0 },
18024 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, 0 },
18026 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
18027 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
18028 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
18029 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, BUILTIN_DESC_SWAP_OPERANDS },
18030 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, BUILTIN_DESC_SWAP_OPERANDS },
18031 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
18032 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
18033 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
18034 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
18035 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
18036 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
18037 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
18038 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
18039 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
18040 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
18041 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
18042 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
18043 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
18044 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
18045 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
18046 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
18047 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
18049 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, 0 },
18050 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, 0 },
18051 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, 0 },
18052 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, 0 },
18054 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, 0 },
18055 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, 0 },
18056 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, 0 },
18057 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, 0 },
18059 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, 0 },
18060 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, 0 },
18061 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, 0 },
18062 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, 0 },
18063 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, 0 },
18066 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, 0 },
18067 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, 0 },
18068 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, 0 },
18069 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, 0 },
18070 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, 0 },
18071 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, 0 },
18072 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, 0 },
18073 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, 0 },
18075 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, 0 },
18076 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, 0 },
18077 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, 0 },
18078 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, 0 },
18079 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, 0 },
18080 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, 0 },
18081 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, 0 },
18082 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, 0 },
18084 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, 0 },
18085 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, 0 },
18086 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, 0 },
18088 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, 0 },
18089 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, 0 },
18090 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, 0 },
18091 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, 0 },
18093 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, 0 },
18094 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, 0 },
18096 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, 0 },
18097 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, 0 },
18098 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, 0 },
18099 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, 0 },
18100 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, 0 },
18101 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, 0 },
18103 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, 0 },
18104 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, 0 },
18105 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, 0 },
18106 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, 0 },
18108 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, 0 },
18109 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, 0 },
18110 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, 0 },
18111 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, 0 },
18112 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, 0 },
18113 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, 0 },
18116 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, UNKNOWN, 0 },
18117 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, UNKNOWN, 0 },
18118 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, UNKNOWN, 0 },
18120 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, UNKNOWN, 0 },
18121 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, UNKNOWN, 0 },
18122 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, UNKNOWN, 0 },
18124 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, UNKNOWN, 0 },
18125 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, UNKNOWN, 0 },
18128 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, 0 },
18129 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, 0 },
18130 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, 0 },
18131 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, 0 },
18132 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, 0 },
18133 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, 0 },
18134 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, 0 },
18135 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, 0 },
18137 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
18138 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
18139 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
18140 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, BUILTIN_DESC_SWAP_OPERANDS },
18141 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, BUILTIN_DESC_SWAP_OPERANDS },
18142 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
18143 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
18144 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
18145 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
18146 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
18147 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
18148 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
18149 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
18150 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
18151 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
18152 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
18153 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
18154 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
18155 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
18156 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
18158 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, 0 },
18159 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, 0 },
18160 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, 0 },
18161 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, 0 },
18163 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, 0 },
18164 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, 0 },
18165 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, 0 },
18166 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, 0 },
18168 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, 0 },
18169 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, 0 },
18170 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, 0 },
18172 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, 0 },
18175 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, 0 },
18176 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, 0 },
18177 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, 0 },
18178 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, 0 },
18179 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, 0 },
18180 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, 0 },
18181 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, 0 },
18182 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, 0 },
18184 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, 0 },
18185 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, 0 },
18186 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, 0 },
18187 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, 0 },
18188 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, 0 },
18189 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, 0 },
18190 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, 0 },
18191 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, 0 },
18193 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, 0 },
18194 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN, 0 },
18196 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, 0 },
18197 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, 0 },
18198 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, 0 },
18199 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, 0 },
18201 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, 0 },
18202 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, 0 },
18204 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, 0 },
18205 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, 0 },
18206 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, 0 },
18207 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, 0 },
18208 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, 0 },
18209 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, 0 },
18211 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, 0 },
18212 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, 0 },
18213 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, 0 },
18214 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, 0 },
18216 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, 0 },
18217 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, 0 },
18218 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, 0 },
18219 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, 0 },
18220 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, 0 },
18221 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, 0 },
18222 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, 0 },
18223 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, 0 },
18225 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, 0 },
18226 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, 0 },
18227 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, 0 },
18229 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, 0 },
18230 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, UNKNOWN, 0 },
18232 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, 0, IX86_BUILTIN_PMULUDQ, UNKNOWN, 0 },
18233 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, UNKNOWN, 0 },
18235 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, UNKNOWN, 0 },
18237 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, UNKNOWN, 0 },
18238 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, UNKNOWN, 0 },
18239 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, UNKNOWN, 0 },
18240 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, UNKNOWN, 0 },
18243 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, 0 },
18244 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, 0 },
18245 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, 0 },
18246 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, 0 },
18247 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, 0 },
18248 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, 0 },
18251 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, 0 },
18252 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, 0 },
18253 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, 0 },
18254 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, 0 },
18255 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, 0 },
18256 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, 0 },
18257 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, 0 },
18258 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, 0 },
18259 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, 0 },
18260 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, 0 },
18261 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, 0 },
18262 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, 0 },
18263 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, 0 },
18264 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, 0 },
18265 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, 0 },
18266 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, 0 },
18267 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, 0 },
18268 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, 0 },
18269 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, 0 },
18270 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, 0 },
18271 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, 0 },
18272 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, 0 },
18273 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, 0 },
18274 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, 0 },
18277 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, 0 },
18278 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, 0 },
18279 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, 0 },
18280 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, 0 },
18281 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, 0 },
18282 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, 0 },
18283 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, 0 },
18284 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, 0 },
18285 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, 0 },
18286 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, 0 },
18287 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, 0, IX86_BUILTIN_PMULDQ128, UNKNOWN, 0 },
18288 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, 0 },
18291 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, 0 },
18294 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, 0 },
18295 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, 0 },
18296 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, 0 },
18297 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, 0 },
18298 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, 0 },
18301 static const struct builtin_description bdesc_1arg[] =
18304 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, UNKNOWN, 0 },
18305 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, UNKNOWN, 0 },
18307 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, UNKNOWN, 0 },
18308 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS_NR, UNKNOWN, 0 },
18309 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, UNKNOWN, 0 },
18310 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, 0 },
18311 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, UNKNOWN, 0 },
18313 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, UNKNOWN, 0 },
18314 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, UNKNOWN, 0 },
18315 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, UNKNOWN, 0 },
18316 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, UNKNOWN, 0 },
18317 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, UNKNOWN, 0 },
18318 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, 0 },
18321 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, UNKNOWN, 0 },
18322 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, UNKNOWN, 0 },
18324 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, UNKNOWN, 0 },
18326 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, UNKNOWN, 0 },
18327 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, UNKNOWN, 0 },
18329 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, UNKNOWN, 0 },
18330 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, UNKNOWN, 0 },
18331 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, UNKNOWN, 0 },
18332 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, 0 },
18333 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, UNKNOWN, 0 },
18335 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, UNKNOWN, 0 },
18337 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, UNKNOWN, 0 },
18338 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, UNKNOWN, 0 },
18339 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, UNKNOWN, 0 },
18340 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, 0 },
18342 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, UNKNOWN, 0 },
18343 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, UNKNOWN, 0 },
18344 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, 0 },
18347 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, 0 },
18348 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, 0 },
18351 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, 0 },
18352 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, 0 },
18353 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, 0 },
18354 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, 0 },
18355 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, 0 },
18356 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, 0 },
18359 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVSXBW128, UNKNOWN, 0 },
18360 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVSXBD128, UNKNOWN, 0 },
18361 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, 0 },
18362 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVSXWD128, UNKNOWN, 0 },
18363 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, 0 },
18364 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, 0, IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, 0 },
18365 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVZXBW128, UNKNOWN, 0 },
18366 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVZXBD128, UNKNOWN, 0 },
18367 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, 0 },
18368 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVZXWD128, UNKNOWN, 0 },
18369 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, 0 },
18370 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, 0, IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, 0 },
18371 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, 0 },
18373 /* Fake 1 arg builtins with a constant smaller than 8 bits as the 2nd arg. */
18374 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundpd, 0, IX86_BUILTIN_ROUNDPD, UNKNOWN, 0 },
18375 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundps, 0, IX86_BUILTIN_ROUNDPS, UNKNOWN, 0 },
18378 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, 0 },
18382 enum multi_arg_type {
18392 MULTI_ARG_3_PERMPS,
18393 MULTI_ARG_3_PERMPD,
18400 MULTI_ARG_2_DI_IMM,
18401 MULTI_ARG_2_SI_IMM,
18402 MULTI_ARG_2_HI_IMM,
18403 MULTI_ARG_2_QI_IMM,
18404 MULTI_ARG_2_SF_CMP,
18405 MULTI_ARG_2_DF_CMP,
18406 MULTI_ARG_2_DI_CMP,
18407 MULTI_ARG_2_SI_CMP,
18408 MULTI_ARG_2_HI_CMP,
18409 MULTI_ARG_2_QI_CMP,
18432 static const struct builtin_description bdesc_multi_arg[] =
18434 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, 0, (int)MULTI_ARG_3_SF },
18435 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, 0, (int)MULTI_ARG_3_DF },
18436 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, 0, (int)MULTI_ARG_3_SF },
18437 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, 0, (int)MULTI_ARG_3_DF },
18438 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, 0, (int)MULTI_ARG_3_SF },
18439 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, 0, (int)MULTI_ARG_3_DF },
18440 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, 0, (int)MULTI_ARG_3_SF },
18441 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, 0, (int)MULTI_ARG_3_DF },
18442 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, 0, (int)MULTI_ARG_3_SF },
18443 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, 0, (int)MULTI_ARG_3_DF },
18444 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, 0, (int)MULTI_ARG_3_SF },
18445 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, 0, (int)MULTI_ARG_3_DF },
18446 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, 0, (int)MULTI_ARG_3_SF },
18447 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, 0, (int)MULTI_ARG_3_DF },
18448 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, 0, (int)MULTI_ARG_3_SF },
18449 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, 0, (int)MULTI_ARG_3_DF },
18450 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
18451 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
18452 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0, (int)MULTI_ARG_3_SI },
18453 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0, (int)MULTI_ARG_3_HI },
18454 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0, (int)MULTI_ARG_3_QI },
18455 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0, (int)MULTI_ARG_3_DF },
18456 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0, (int)MULTI_ARG_3_SF },
18457 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, 0, (int)MULTI_ARG_3_QI },
18458 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, 0, (int)MULTI_ARG_3_PERMPS },
18459 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, 0, (int)MULTI_ARG_3_PERMPD },
18460 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, 0, (int)MULTI_ARG_3_HI },
18461 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, 0, (int)MULTI_ARG_3_HI },
18462 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, 0, (int)MULTI_ARG_3_HI_SI },
18463 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, 0, (int)MULTI_ARG_3_HI_SI },
18464 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, 0, (int)MULTI_ARG_3_SI },
18465 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, 0, (int)MULTI_ARG_3_SI },
18466 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, 0, (int)MULTI_ARG_3_SI_DI },
18467 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, 0, (int)MULTI_ARG_3_SI_DI },
18468 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, 0, (int)MULTI_ARG_3_SI_DI },
18469 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, 0, (int)MULTI_ARG_3_SI_DI },
18470 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, 0, (int)MULTI_ARG_3_HI_SI },
18471 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, 0, (int)MULTI_ARG_3_HI_SI },
18472 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, 0, (int)MULTI_ARG_2_DI },
18473 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, 0, (int)MULTI_ARG_2_SI },
18474 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, 0, (int)MULTI_ARG_2_HI },
18475 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, 0, (int)MULTI_ARG_2_QI },
18476 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, 0, (int)MULTI_ARG_2_DI_IMM },
18477 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, 0, (int)MULTI_ARG_2_SI_IMM },
18478 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, 0, (int)MULTI_ARG_2_HI_IMM },
18479 { OPTION_MASK_ISA_SSE5, CODE_FOR_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, 0, (int)MULTI_ARG_2_QI_IMM },
18480 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, 0, (int)MULTI_ARG_2_DI },
18481 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, 0, (int)MULTI_ARG_2_SI },
18482 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, 0, (int)MULTI_ARG_2_HI },
18483 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, 0, (int)MULTI_ARG_2_QI },
18484 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, 0, (int)MULTI_ARG_2_DI },
18485 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, 0, (int)MULTI_ARG_2_SI },
18486 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, 0, (int)MULTI_ARG_2_HI },
18487 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, 0, (int)MULTI_ARG_2_QI },
18488 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, 0, (int)MULTI_ARG_2_SF },
18489 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, 0, (int)MULTI_ARG_2_DF },
18490 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, 0, (int)MULTI_ARG_1_SF },
18491 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, 0, (int)MULTI_ARG_1_DF },
18492 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, 0, (int)MULTI_ARG_1_PH2PS },
18493 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, 0, (int)MULTI_ARG_1_PS2PH },
18494 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, 0, (int)MULTI_ARG_1_QI_HI },
18495 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, 0, (int)MULTI_ARG_1_QI_SI },
18496 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, 0, (int)MULTI_ARG_1_QI_DI },
18497 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, 0, (int)MULTI_ARG_1_HI_SI },
18498 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, 0, (int)MULTI_ARG_1_HI_DI },
18499 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, 0, (int)MULTI_ARG_1_SI_DI },
18500 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, 0, (int)MULTI_ARG_1_QI_HI },
18501 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, 0, (int)MULTI_ARG_1_QI_SI },
18502 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, 0, (int)MULTI_ARG_1_QI_DI },
18503 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, 0, (int)MULTI_ARG_1_HI_SI },
18504 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, 0, (int)MULTI_ARG_1_HI_DI },
18505 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, 0, (int)MULTI_ARG_1_SI_DI },
18506 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, 0, (int)MULTI_ARG_1_QI_HI },
18507 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, 0, (int)MULTI_ARG_1_HI_SI },
18508 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, 0, (int)MULTI_ARG_1_SI_DI },
18510 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
18511 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
18512 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
18513 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
18514 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
18515 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
18516 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
18517 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
18518 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18519 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18520 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
18521 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
18522 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
18523 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
18524 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
18525 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
18527 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
18528 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
18529 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
18530 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
18531 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
18532 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
18533 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
18534 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
18535 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18536 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18537 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
18538 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
18539 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
18540 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
18541 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
18542 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
18544 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
18545 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
18546 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
18547 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
18548 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
18549 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
18550 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
18551 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
18552 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18553 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18554 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
18555 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
18556 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
18557 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
18558 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
18559 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
18561 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
18562 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
18563 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
18564 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
18565 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
18566 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
18567 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
18568 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
18569 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18570 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18571 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
18572 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
18573 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
18574 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
18575 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
18576 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
18578 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
18579 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
18580 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
18581 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
18582 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
18583 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
18584 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
18586 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
18587 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
18588 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
18589 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
18590 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
18591 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
18592 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
18594 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
18595 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
18596 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
18597 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
18598 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
18599 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
18600 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
18602 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
18603 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
18604 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
18605 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
18606 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
18607 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
18608 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
18610 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
18611 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
18612 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
18613 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
18614 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
18615 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
18616 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
18618 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
18619 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
18620 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
18621 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
18622 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
18623 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
18624 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
18626 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
18627 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
18628 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
18629 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
18630 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
18631 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
18632 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
18634 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
18635 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
18636 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
18637 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
18638 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
18639 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
18640 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
18642 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
18643 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
18644 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
18645 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
18646 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
18647 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
18648 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
18649 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
18651 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
18652 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
18653 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
18654 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
18655 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
18656 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
18657 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
18658 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
18660 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
18661 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
18662 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
18663 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
18664 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
18665 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
18666 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
18667 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
18670 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
18671 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
18674 ix86_init_mmx_sse_builtins (void)
18676 const struct builtin_description * d;
18679 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
18680 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
18681 tree V1DI_type_node
18682 = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
18683 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
18684 tree V2DI_type_node
18685 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
18686 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
18687 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
18688 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
18689 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
18690 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
18691 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
18693 tree pchar_type_node = build_pointer_type (char_type_node);
18694 tree pcchar_type_node = build_pointer_type (
18695 build_type_variant (char_type_node, 1, 0));
18696 tree pfloat_type_node = build_pointer_type (float_type_node);
18697 tree pcfloat_type_node = build_pointer_type (
18698 build_type_variant (float_type_node, 1, 0));
18699 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
18700 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
18701 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
18704 tree int_ftype_v4sf_v4sf
18705 = build_function_type_list (integer_type_node,
18706 V4SF_type_node, V4SF_type_node, NULL_TREE);
18707 tree v4si_ftype_v4sf_v4sf
18708 = build_function_type_list (V4SI_type_node,
18709 V4SF_type_node, V4SF_type_node, NULL_TREE);
18710 /* MMX/SSE/integer conversions. */
18711 tree int_ftype_v4sf
18712 = build_function_type_list (integer_type_node,
18713 V4SF_type_node, NULL_TREE);
18714 tree int64_ftype_v4sf
18715 = build_function_type_list (long_long_integer_type_node,
18716 V4SF_type_node, NULL_TREE);
18717 tree int_ftype_v8qi
18718 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
18719 tree v4sf_ftype_v4sf_int
18720 = build_function_type_list (V4SF_type_node,
18721 V4SF_type_node, integer_type_node, NULL_TREE);
18722 tree v4sf_ftype_v4sf_int64
18723 = build_function_type_list (V4SF_type_node,
18724 V4SF_type_node, long_long_integer_type_node,
18726 tree v4sf_ftype_v4sf_v2si
18727 = build_function_type_list (V4SF_type_node,
18728 V4SF_type_node, V2SI_type_node, NULL_TREE);
18730 /* Miscellaneous. */
18731 tree v8qi_ftype_v4hi_v4hi
18732 = build_function_type_list (V8QI_type_node,
18733 V4HI_type_node, V4HI_type_node, NULL_TREE);
18734 tree v4hi_ftype_v2si_v2si
18735 = build_function_type_list (V4HI_type_node,
18736 V2SI_type_node, V2SI_type_node, NULL_TREE);
18737 tree v4sf_ftype_v4sf_v4sf_int
18738 = build_function_type_list (V4SF_type_node,
18739 V4SF_type_node, V4SF_type_node,
18740 integer_type_node, NULL_TREE);
18741 tree v2si_ftype_v4hi_v4hi
18742 = build_function_type_list (V2SI_type_node,
18743 V4HI_type_node, V4HI_type_node, NULL_TREE);
18744 tree v4hi_ftype_v4hi_int
18745 = build_function_type_list (V4HI_type_node,
18746 V4HI_type_node, integer_type_node, NULL_TREE);
18747 tree v2si_ftype_v2si_int
18748 = build_function_type_list (V2SI_type_node,
18749 V2SI_type_node, integer_type_node, NULL_TREE);
18750 tree v1di_ftype_v1di_int
18751 = build_function_type_list (V1DI_type_node,
18752 V1DI_type_node, integer_type_node, NULL_TREE);
18754 tree void_ftype_void
18755 = build_function_type (void_type_node, void_list_node);
18756 tree void_ftype_unsigned
18757 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
18758 tree void_ftype_unsigned_unsigned
18759 = build_function_type_list (void_type_node, unsigned_type_node,
18760 unsigned_type_node, NULL_TREE);
18761 tree void_ftype_pcvoid_unsigned_unsigned
18762 = build_function_type_list (void_type_node, const_ptr_type_node,
18763 unsigned_type_node, unsigned_type_node,
18765 tree unsigned_ftype_void
18766 = build_function_type (unsigned_type_node, void_list_node);
18767 tree v2si_ftype_v4sf
18768 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
18769 /* Loads/stores. */
18770 tree void_ftype_v8qi_v8qi_pchar
18771 = build_function_type_list (void_type_node,
18772 V8QI_type_node, V8QI_type_node,
18773 pchar_type_node, NULL_TREE);
18774 tree v4sf_ftype_pcfloat
18775 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
18776 /* @@@ the type is bogus */
18777 tree v4sf_ftype_v4sf_pv2si
18778 = build_function_type_list (V4SF_type_node,
18779 V4SF_type_node, pv2si_type_node, NULL_TREE);
18780 tree void_ftype_pv2si_v4sf
18781 = build_function_type_list (void_type_node,
18782 pv2si_type_node, V4SF_type_node, NULL_TREE);
18783 tree void_ftype_pfloat_v4sf
18784 = build_function_type_list (void_type_node,
18785 pfloat_type_node, V4SF_type_node, NULL_TREE);
18786 tree void_ftype_pdi_di
18787 = build_function_type_list (void_type_node,
18788 pdi_type_node, long_long_unsigned_type_node,
18790 tree void_ftype_pv2di_v2di
18791 = build_function_type_list (void_type_node,
18792 pv2di_type_node, V2DI_type_node, NULL_TREE);
18793 /* Normal vector unops. */
18794 tree v4sf_ftype_v4sf
18795 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
18796 tree v16qi_ftype_v16qi
18797 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
18798 tree v8hi_ftype_v8hi
18799 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
18800 tree v4si_ftype_v4si
18801 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
18802 tree v8qi_ftype_v8qi
18803 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
18804 tree v4hi_ftype_v4hi
18805 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
18807 /* Normal vector binops. */
18808 tree v4sf_ftype_v4sf_v4sf
18809 = build_function_type_list (V4SF_type_node,
18810 V4SF_type_node, V4SF_type_node, NULL_TREE);
18811 tree v8qi_ftype_v8qi_v8qi
18812 = build_function_type_list (V8QI_type_node,
18813 V8QI_type_node, V8QI_type_node, NULL_TREE);
18814 tree v4hi_ftype_v4hi_v4hi
18815 = build_function_type_list (V4HI_type_node,
18816 V4HI_type_node, V4HI_type_node, NULL_TREE);
18817 tree v2si_ftype_v2si_v2si
18818 = build_function_type_list (V2SI_type_node,
18819 V2SI_type_node, V2SI_type_node, NULL_TREE);
18820 tree v1di_ftype_v1di_v1di
18821 = build_function_type_list (V1DI_type_node,
18822 V1DI_type_node, V1DI_type_node, NULL_TREE);
18824 tree di_ftype_di_di_int
18825 = build_function_type_list (long_long_unsigned_type_node,
18826 long_long_unsigned_type_node,
18827 long_long_unsigned_type_node,
18828 integer_type_node, NULL_TREE);
18830 tree v2si_ftype_v2sf
18831 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
18832 tree v2sf_ftype_v2si
18833 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
18834 tree v2si_ftype_v2si
18835 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
18836 tree v2sf_ftype_v2sf
18837 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
18838 tree v2sf_ftype_v2sf_v2sf
18839 = build_function_type_list (V2SF_type_node,
18840 V2SF_type_node, V2SF_type_node, NULL_TREE);
18841 tree v2si_ftype_v2sf_v2sf
18842 = build_function_type_list (V2SI_type_node,
18843 V2SF_type_node, V2SF_type_node, NULL_TREE);
18844 tree pint_type_node = build_pointer_type (integer_type_node);
18845 tree pdouble_type_node = build_pointer_type (double_type_node);
18846 tree pcdouble_type_node = build_pointer_type (
18847 build_type_variant (double_type_node, 1, 0));
18848 tree int_ftype_v2df_v2df
18849 = build_function_type_list (integer_type_node,
18850 V2DF_type_node, V2DF_type_node, NULL_TREE);
18852 tree void_ftype_pcvoid
18853 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
18854 tree v4sf_ftype_v4si
18855 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
18856 tree v4si_ftype_v4sf
18857 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
18858 tree v2df_ftype_v4si
18859 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
18860 tree v4si_ftype_v2df
18861 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
18862 tree v4si_ftype_v2df_v2df
18863 = build_function_type_list (V4SI_type_node,
18864 V2DF_type_node, V2DF_type_node, NULL_TREE);
18865 tree v2si_ftype_v2df
18866 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
18867 tree v4sf_ftype_v2df
18868 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
18869 tree v2df_ftype_v2si
18870 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
18871 tree v2df_ftype_v4sf
18872 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
18873 tree int_ftype_v2df
18874 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
18875 tree int64_ftype_v2df
18876 = build_function_type_list (long_long_integer_type_node,
18877 V2DF_type_node, NULL_TREE);
18878 tree v2df_ftype_v2df_int
18879 = build_function_type_list (V2DF_type_node,
18880 V2DF_type_node, integer_type_node, NULL_TREE);
18881 tree v2df_ftype_v2df_int64
18882 = build_function_type_list (V2DF_type_node,
18883 V2DF_type_node, long_long_integer_type_node,
18885 tree v4sf_ftype_v4sf_v2df
18886 = build_function_type_list (V4SF_type_node,
18887 V4SF_type_node, V2DF_type_node, NULL_TREE);
18888 tree v2df_ftype_v2df_v4sf
18889 = build_function_type_list (V2DF_type_node,
18890 V2DF_type_node, V4SF_type_node, NULL_TREE);
18891 tree v2df_ftype_v2df_v2df_int
18892 = build_function_type_list (V2DF_type_node,
18893 V2DF_type_node, V2DF_type_node,
18896 tree v2df_ftype_v2df_pcdouble
18897 = build_function_type_list (V2DF_type_node,
18898 V2DF_type_node, pcdouble_type_node, NULL_TREE);
18899 tree void_ftype_pdouble_v2df
18900 = build_function_type_list (void_type_node,
18901 pdouble_type_node, V2DF_type_node, NULL_TREE);
18902 tree void_ftype_pint_int
18903 = build_function_type_list (void_type_node,
18904 pint_type_node, integer_type_node, NULL_TREE);
18905 tree void_ftype_v16qi_v16qi_pchar
18906 = build_function_type_list (void_type_node,
18907 V16QI_type_node, V16QI_type_node,
18908 pchar_type_node, NULL_TREE);
18909 tree v2df_ftype_pcdouble
18910 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
18911 tree v2df_ftype_v2df_v2df
18912 = build_function_type_list (V2DF_type_node,
18913 V2DF_type_node, V2DF_type_node, NULL_TREE);
18914 tree v16qi_ftype_v16qi_v16qi
18915 = build_function_type_list (V16QI_type_node,
18916 V16QI_type_node, V16QI_type_node, NULL_TREE);
18917 tree v8hi_ftype_v8hi_v8hi
18918 = build_function_type_list (V8HI_type_node,
18919 V8HI_type_node, V8HI_type_node, NULL_TREE);
18920 tree v4si_ftype_v4si_v4si
18921 = build_function_type_list (V4SI_type_node,
18922 V4SI_type_node, V4SI_type_node, NULL_TREE);
18923 tree v2di_ftype_v2di_v2di
18924 = build_function_type_list (V2DI_type_node,
18925 V2DI_type_node, V2DI_type_node, NULL_TREE);
18926 tree v2di_ftype_v2df_v2df
18927 = build_function_type_list (V2DI_type_node,
18928 V2DF_type_node, V2DF_type_node, NULL_TREE);
18929 tree v2df_ftype_v2df
18930 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
18931 tree v2di_ftype_v2di_int
18932 = build_function_type_list (V2DI_type_node,
18933 V2DI_type_node, integer_type_node, NULL_TREE);
18934 tree v2di_ftype_v2di_v2di_int
18935 = build_function_type_list (V2DI_type_node, V2DI_type_node,
18936 V2DI_type_node, integer_type_node, NULL_TREE);
18937 tree v4si_ftype_v4si_int
18938 = build_function_type_list (V4SI_type_node,
18939 V4SI_type_node, integer_type_node, NULL_TREE);
18940 tree v8hi_ftype_v8hi_int
18941 = build_function_type_list (V8HI_type_node,
18942 V8HI_type_node, integer_type_node, NULL_TREE);
18943 tree v4si_ftype_v8hi_v8hi
18944 = build_function_type_list (V4SI_type_node,
18945 V8HI_type_node, V8HI_type_node, NULL_TREE);
18946 tree v1di_ftype_v8qi_v8qi
18947 = build_function_type_list (V1DI_type_node,
18948 V8QI_type_node, V8QI_type_node, NULL_TREE);
18949 tree v1di_ftype_v2si_v2si
18950 = build_function_type_list (V1DI_type_node,
18951 V2SI_type_node, V2SI_type_node, NULL_TREE);
18952 tree v2di_ftype_v16qi_v16qi
18953 = build_function_type_list (V2DI_type_node,
18954 V16QI_type_node, V16QI_type_node, NULL_TREE);
18955 tree v2di_ftype_v4si_v4si
18956 = build_function_type_list (V2DI_type_node,
18957 V4SI_type_node, V4SI_type_node, NULL_TREE);
18958 tree int_ftype_v16qi
18959 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
18960 tree v16qi_ftype_pcchar
18961 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
18962 tree void_ftype_pchar_v16qi
18963 = build_function_type_list (void_type_node,
18964 pchar_type_node, V16QI_type_node, NULL_TREE);
18966 tree v2di_ftype_v2di_unsigned_unsigned
18967 = build_function_type_list (V2DI_type_node, V2DI_type_node,
18968 unsigned_type_node, unsigned_type_node,
18970 tree v2di_ftype_v2di_v2di_unsigned_unsigned
18971 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
18972 unsigned_type_node, unsigned_type_node,
18974 tree v2di_ftype_v2di_v16qi
18975 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
18977 tree v2df_ftype_v2df_v2df_v2df
18978 = build_function_type_list (V2DF_type_node,
18979 V2DF_type_node, V2DF_type_node,
18980 V2DF_type_node, NULL_TREE);
18981 tree v4sf_ftype_v4sf_v4sf_v4sf
18982 = build_function_type_list (V4SF_type_node,
18983 V4SF_type_node, V4SF_type_node,
18984 V4SF_type_node, NULL_TREE);
18985 tree v8hi_ftype_v16qi
18986 = build_function_type_list (V8HI_type_node, V16QI_type_node,
18988 tree v4si_ftype_v16qi
18989 = build_function_type_list (V4SI_type_node, V16QI_type_node,
18991 tree v2di_ftype_v16qi
18992 = build_function_type_list (V2DI_type_node, V16QI_type_node,
18994 tree v4si_ftype_v8hi
18995 = build_function_type_list (V4SI_type_node, V8HI_type_node,
18997 tree v2di_ftype_v8hi
18998 = build_function_type_list (V2DI_type_node, V8HI_type_node,
19000 tree v2di_ftype_v4si
19001 = build_function_type_list (V2DI_type_node, V4SI_type_node,
19003 tree v2di_ftype_pv2di
19004 = build_function_type_list (V2DI_type_node, pv2di_type_node,
19006 tree v16qi_ftype_v16qi_v16qi_int
19007 = build_function_type_list (V16QI_type_node, V16QI_type_node,
19008 V16QI_type_node, integer_type_node,
19010 tree v16qi_ftype_v16qi_v16qi_v16qi
19011 = build_function_type_list (V16QI_type_node, V16QI_type_node,
19012 V16QI_type_node, V16QI_type_node,
19014 tree v8hi_ftype_v8hi_v8hi_int
19015 = build_function_type_list (V8HI_type_node, V8HI_type_node,
19016 V8HI_type_node, integer_type_node,
19018 tree v4si_ftype_v4si_v4si_int
19019 = build_function_type_list (V4SI_type_node, V4SI_type_node,
19020 V4SI_type_node, integer_type_node,
19022 tree int_ftype_v2di_v2di
19023 = build_function_type_list (integer_type_node,
19024 V2DI_type_node, V2DI_type_node,
19026 tree int_ftype_v16qi_int_v16qi_int_int
19027 = build_function_type_list (integer_type_node,
19034 tree v16qi_ftype_v16qi_int_v16qi_int_int
19035 = build_function_type_list (V16QI_type_node,
19042 tree int_ftype_v16qi_v16qi_int
19043 = build_function_type_list (integer_type_node,
19049 /* SSE5 instructions */
19050 tree v2di_ftype_v2di_v2di_v2di
19051 = build_function_type_list (V2DI_type_node,
19057 tree v4si_ftype_v4si_v4si_v4si
19058 = build_function_type_list (V4SI_type_node,
19064 tree v4si_ftype_v4si_v4si_v2di
19065 = build_function_type_list (V4SI_type_node,
19071 tree v8hi_ftype_v8hi_v8hi_v8hi
19072 = build_function_type_list (V8HI_type_node,
19078 tree v8hi_ftype_v8hi_v8hi_v4si
19079 = build_function_type_list (V8HI_type_node,
19085 tree v2df_ftype_v2df_v2df_v16qi
19086 = build_function_type_list (V2DF_type_node,
19092 tree v4sf_ftype_v4sf_v4sf_v16qi
19093 = build_function_type_list (V4SF_type_node,
19099 tree v2di_ftype_v2di_si
19100 = build_function_type_list (V2DI_type_node,
19105 tree v4si_ftype_v4si_si
19106 = build_function_type_list (V4SI_type_node,
19111 tree v8hi_ftype_v8hi_si
19112 = build_function_type_list (V8HI_type_node,
19117 tree v16qi_ftype_v16qi_si
19118 = build_function_type_list (V16QI_type_node,
19122 tree v4sf_ftype_v4hi
19123 = build_function_type_list (V4SF_type_node,
19127 tree v4hi_ftype_v4sf
19128 = build_function_type_list (V4HI_type_node,
19132 tree v2di_ftype_v2di
19133 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
19137 /* The __float80 type. */
19138 if (TYPE_MODE (long_double_type_node) == XFmode)
19139 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
19143 /* The __float80 type. */
19144 tree float80_type_node = make_node (REAL_TYPE);
19146 TYPE_PRECISION (float80_type_node) = 80;
19147 layout_type (float80_type_node);
19148 (*lang_hooks.types.register_builtin_type) (float80_type_node,
19154 tree float128_type_node = make_node (REAL_TYPE);
19156 TYPE_PRECISION (float128_type_node) = 128;
19157 layout_type (float128_type_node);
19158 (*lang_hooks.types.register_builtin_type) (float128_type_node,
19161 /* TFmode support builtins. */
19162 ftype = build_function_type (float128_type_node,
19164 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_infq", ftype, IX86_BUILTIN_INFQ);
19166 ftype = build_function_type_list (float128_type_node,
19167 float128_type_node,
19169 def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_fabsq", ftype, IX86_BUILTIN_FABSQ);
19171 ftype = build_function_type_list (float128_type_node,
19172 float128_type_node,
19173 float128_type_node,
19175 def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_copysignq", ftype, IX86_BUILTIN_COPYSIGNQ);
19178 /* Add all SSE builtins that are more or less simple operations on
19180 for (i = 0, d = bdesc_sse_3arg;
19181 i < ARRAY_SIZE (bdesc_sse_3arg);
19184 /* Use one of the operands; the target can have a different mode for
19185 mask-generating compares. */
19186 enum machine_mode mode;
19191 mode = insn_data[d->icode].operand[1].mode;
19196 type = v16qi_ftype_v16qi_v16qi_int;
19199 type = v8hi_ftype_v8hi_v8hi_int;
19202 type = v4si_ftype_v4si_v4si_int;
19205 type = v2di_ftype_v2di_v2di_int;
19208 type = v2df_ftype_v2df_v2df_int;
19211 type = v4sf_ftype_v4sf_v4sf_int;
19214 gcc_unreachable ();
19217 /* Override for variable blends. */
19220 case CODE_FOR_sse4_1_blendvpd:
19221 type = v2df_ftype_v2df_v2df_v2df;
19223 case CODE_FOR_sse4_1_blendvps:
19224 type = v4sf_ftype_v4sf_v4sf_v4sf;
19226 case CODE_FOR_sse4_1_pblendvb:
19227 type = v16qi_ftype_v16qi_v16qi_v16qi;
19233 def_builtin_const (d->mask, d->name, type, d->code);
19236 /* Add all builtins that are more or less simple operations on two
19238 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19240 /* Use one of the operands; the target can have a different mode for
19241 mask-generating compares. */
19242 enum machine_mode mode;
19247 mode = insn_data[d->icode].operand[1].mode;
19252 type = v16qi_ftype_v16qi_v16qi;
19255 type = v8hi_ftype_v8hi_v8hi;
19258 type = v4si_ftype_v4si_v4si;
19261 type = v2di_ftype_v2di_v2di;
19264 type = v2df_ftype_v2df_v2df;
19267 type = v4sf_ftype_v4sf_v4sf;
19270 type = v8qi_ftype_v8qi_v8qi;
19273 type = v4hi_ftype_v4hi_v4hi;
19276 type = v2si_ftype_v2si_v2si;
19279 type = v1di_ftype_v1di_v1di;
19283 gcc_unreachable ();
19286 /* Override for comparisons. */
19287 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
19288 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
19289 type = v4si_ftype_v4sf_v4sf;
19291 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
19292 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
19293 type = v2di_ftype_v2df_v2df;
19295 if (d->icode == CODE_FOR_vec_pack_sfix_v2df)
19296 type = v4si_ftype_v2df_v2df;
19298 def_builtin_const (d->mask, d->name, type, d->code);
19301 /* Add all builtins that are more or less simple operations on 1 operand. */
19302 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19304 enum machine_mode mode;
19309 mode = insn_data[d->icode].operand[1].mode;
19314 type = v16qi_ftype_v16qi;
19317 type = v8hi_ftype_v8hi;
19320 type = v4si_ftype_v4si;
19323 type = v2df_ftype_v2df;
19326 type = v4sf_ftype_v4sf;
19329 type = v8qi_ftype_v8qi;
19332 type = v4hi_ftype_v4hi;
19335 type = v2si_ftype_v2si;
19342 def_builtin_const (d->mask, d->name, type, d->code);
19345 /* pcmpestr[im] insns. */
19346 for (i = 0, d = bdesc_pcmpestr;
19347 i < ARRAY_SIZE (bdesc_pcmpestr);
19350 if (d->code == IX86_BUILTIN_PCMPESTRM128)
19351 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
19353 ftype = int_ftype_v16qi_int_v16qi_int_int;
19354 def_builtin_const (d->mask, d->name, ftype, d->code);
19357 /* pcmpistr[im] insns. */
19358 for (i = 0, d = bdesc_pcmpistr;
19359 i < ARRAY_SIZE (bdesc_pcmpistr);
19362 if (d->code == IX86_BUILTIN_PCMPISTRM128)
19363 ftype = v16qi_ftype_v16qi_v16qi_int;
19365 ftype = int_ftype_v16qi_v16qi_int;
19366 def_builtin_const (d->mask, d->name, ftype, d->code);
19369 /* Add the remaining MMX insns with somewhat more complicated types. */
19370 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
19372 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllwi", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSLLWI);
19373 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pslldi", v2si_ftype_v2si_int, IX86_BUILTIN_PSLLDI);
19374 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllqi", v1di_ftype_v1di_int, IX86_BUILTIN_PSLLQI);
19375 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PSLLW);
19376 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_v2si, IX86_BUILTIN_PSLLD);
19377 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllq", v1di_ftype_v1di_v1di, IX86_BUILTIN_PSLLQ);
19379 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlwi", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSRLWI);
19380 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrldi", v2si_ftype_v2si_int, IX86_BUILTIN_PSRLDI);
19381 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlqi", v1di_ftype_v1di_int, IX86_BUILTIN_PSRLQI);
19382 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PSRLW);
19383 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_v2si, IX86_BUILTIN_PSRLD);
19384 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlq", v1di_ftype_v1di_v1di, IX86_BUILTIN_PSRLQ);
19386 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrawi", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSRAWI);
19387 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psradi", v2si_ftype_v2si_int, IX86_BUILTIN_PSRADI);
19388 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PSRAW);
19389 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_v2si, IX86_BUILTIN_PSRAD);
19391 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
19392 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
19394 /* comi/ucomi insns. */
19395 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
19396 if (d->mask == OPTION_MASK_ISA_SSE2)
19397 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
19399 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
19402 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
19403 def_builtin_const (d->mask, d->name, int_ftype_v2di_v2di, d->code);
19405 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
19406 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
19407 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
19409 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
19410 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
19411 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
19412 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
19413 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
19414 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
19415 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
19416 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
19417 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
19418 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
19419 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
19421 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
19423 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
19424 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
19426 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
19427 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
19428 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
19429 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
19431 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
19432 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
19433 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
19434 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
19436 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
19438 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_psadbw", v1di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
19440 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
19441 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
19442 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
19443 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtps_nr", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS_NR);
19444 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
19445 ftype = build_function_type_list (float_type_node,
19448 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtf", ftype, IX86_BUILTIN_RSQRTF);
19449 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
19450 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtps_nr", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS_NR);
19451 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
19453 /* Original 3DNow! */
19454 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
19455 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
19456 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
19457 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
19458 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
19459 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
19460 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
19461 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
19462 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
19463 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
19464 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
19465 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
19466 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
19467 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
19468 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
19469 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
19470 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
19471 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
19472 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
19473 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
19475 /* 3DNow! extension as used in the Athlon CPU. */
19476 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
19477 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
19478 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
19479 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
19480 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
19481 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
19484 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
19486 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
19487 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
19489 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
19490 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
19492 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
19493 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
19494 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
19495 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
19496 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
19498 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
19499 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
19500 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
19501 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
19503 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
19504 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
19506 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
19507 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
19509 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
19510 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
19511 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
19512 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
19513 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
19515 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
19517 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
19518 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
19519 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
19520 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
19522 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
19523 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
19524 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
19526 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
19527 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
19528 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
19529 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
19531 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
19532 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
19533 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
19535 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
19536 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
19538 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq", v1di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
19539 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
19541 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
19542 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
19543 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
19544 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
19545 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
19546 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
19547 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
19549 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
19550 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
19551 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
19552 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
19553 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
19554 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
19555 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
19557 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
19558 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
19559 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
19560 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
19562 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
19564 /* Prescott New Instructions. */
19565 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
19566 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
19567 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_lddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
19570 def_builtin_const (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
19571 def_builtin_const (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int, IX86_BUILTIN_PALIGNR);
19574 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_movntdqa", v2di_ftype_pv2di, IX86_BUILTIN_MOVNTDQA);
19575 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVSXBW128);
19576 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVSXBD128);
19577 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVSXBQ128);
19578 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVSXWD128);
19579 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVSXWQ128);
19580 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVSXDQ128);
19581 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVZXBW128);
19582 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVZXBD128);
19583 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVZXBQ128);
19584 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVZXWD128);
19585 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVZXWQ128);
19586 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVZXDQ128);
19587 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmuldq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULDQ128);
19589 /* SSE4.1 and SSE5 */
19590 def_builtin_const (OPTION_MASK_ISA_ROUND, "__builtin_ia32_roundpd", v2df_ftype_v2df_int, IX86_BUILTIN_ROUNDPD);
19591 def_builtin_const (OPTION_MASK_ISA_ROUND, "__builtin_ia32_roundps", v4sf_ftype_v4sf_int, IX86_BUILTIN_ROUNDPS);
19594 ftype = build_function_type_list (unsigned_type_node,
19595 unsigned_type_node,
19596 unsigned_char_type_node,
19598 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32qi", ftype, IX86_BUILTIN_CRC32QI);
19599 ftype = build_function_type_list (unsigned_type_node,
19600 unsigned_type_node,
19601 short_unsigned_type_node,
19603 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32hi", ftype, IX86_BUILTIN_CRC32HI);
19604 ftype = build_function_type_list (unsigned_type_node,
19605 unsigned_type_node,
19606 unsigned_type_node,
19608 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32si", ftype, IX86_BUILTIN_CRC32SI);
19609 ftype = build_function_type_list (long_long_unsigned_type_node,
19610 long_long_unsigned_type_node,
19611 long_long_unsigned_type_node,
19613 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32di", ftype, IX86_BUILTIN_CRC32DI);
19618 /* Define AES built-in functions only if AES is enabled. */
19619 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
19620 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
19621 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
19622 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
19623 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
19624 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
19630 /* Define PCLMUL built-in function only if PCLMUL is enabled. */
19631 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
19634 /* AMDFAM10 SSE4A New built-ins */
19635 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntsd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
19636 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntss", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
19637 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrqi", v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
19638 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrq", v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
19639 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertqi", v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
19640 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertq", v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
19642 /* Access to the vec_init patterns. */
19643 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
19644 integer_type_node, NULL_TREE);
19645 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
19647 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
19648 short_integer_type_node,
19649 short_integer_type_node,
19650 short_integer_type_node, NULL_TREE);
19651 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
19653 ftype = build_function_type_list (V8QI_type_node, char_type_node,
19654 char_type_node, char_type_node,
19655 char_type_node, char_type_node,
19656 char_type_node, char_type_node,
19657 char_type_node, NULL_TREE);
19658 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
19660 /* Access to the vec_extract patterns. */
19661 ftype = build_function_type_list (double_type_node, V2DF_type_node,
19662 integer_type_node, NULL_TREE);
19663 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
19665 ftype = build_function_type_list (long_long_integer_type_node,
19666 V2DI_type_node, integer_type_node,
19668 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
19670 ftype = build_function_type_list (float_type_node, V4SF_type_node,
19671 integer_type_node, NULL_TREE);
19672 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
19674 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
19675 integer_type_node, NULL_TREE);
19676 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
19678 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
19679 integer_type_node, NULL_TREE);
19680 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
19682 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
19683 integer_type_node, NULL_TREE);
19684 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
19686 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
19687 integer_type_node, NULL_TREE);
19688 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
19690 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
19691 integer_type_node, NULL_TREE);
19692 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
19694 /* Access to the vec_set patterns. */
19695 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
19697 integer_type_node, NULL_TREE);
19698 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
19700 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
19702 integer_type_node, NULL_TREE);
19703 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
19705 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
19707 integer_type_node, NULL_TREE);
19708 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
19710 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
19712 integer_type_node, NULL_TREE);
19713 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
19715 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
19717 integer_type_node, NULL_TREE);
19718 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
19720 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
19722 integer_type_node, NULL_TREE);
19723 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
19725 /* Add SSE5 multi-arg argument instructions */
19726 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
19728 tree mtype = NULL_TREE;
19733 switch ((enum multi_arg_type)d->flag)
19735 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
19736 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
19737 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
19738 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
19739 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
19740 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
19741 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
19742 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
19743 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
19744 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
19745 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
19746 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
19747 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
19748 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
19749 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
19750 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
19751 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
19752 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
19753 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
19754 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
19755 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
19756 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
19757 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
19758 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
19759 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
19760 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
19761 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
19762 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
19763 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
19764 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
19765 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
19766 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
19767 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
19768 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
19769 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
19770 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
19771 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
19772 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
19773 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
19774 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
19775 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
19776 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
19777 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
19778 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
19779 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
19780 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
19781 case MULTI_ARG_UNKNOWN:
19783 gcc_unreachable ();
19787 def_builtin_const (d->mask, d->name, mtype, d->code);
19792 ix86_init_builtins (void)
19795 ix86_init_mmx_sse_builtins ();
19798 /* Errors in the source file can cause expand_expr to return const0_rtx
19799 where we expect a vector. To avoid crashing, use one of the vector
19800 clear instructions. */
19802 safe_vector_operand (rtx x, enum machine_mode mode)
19804 if (x == const0_rtx)
19805 x = CONST0_RTX (mode);
19809 /* Subroutine of ix86_expand_builtin to take care of SSE insns with
19810 4 operands. The third argument must be a constant smaller than 8
19814 ix86_expand_sse_4_operands_builtin (enum insn_code icode, tree exp,
19818 tree arg0 = CALL_EXPR_ARG (exp, 0);
19819 tree arg1 = CALL_EXPR_ARG (exp, 1);
19820 tree arg2 = CALL_EXPR_ARG (exp, 2);
19821 rtx op0 = expand_normal (arg0);
19822 rtx op1 = expand_normal (arg1);
19823 rtx op2 = expand_normal (arg2);
19824 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19825 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
19826 enum machine_mode mode2 = insn_data[icode].operand[2].mode;
19827 enum machine_mode mode3 = insn_data[icode].operand[3].mode;
19829 if (VECTOR_MODE_P (mode1))
19830 op0 = safe_vector_operand (op0, mode1);
19831 if (VECTOR_MODE_P (mode2))
19832 op1 = safe_vector_operand (op1, mode2);
19833 if (VECTOR_MODE_P (mode3))
19834 op2 = safe_vector_operand (op2, mode3);
19838 || GET_MODE (target) != tmode
19839 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19840 target = gen_reg_rtx (tmode);
19842 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19843 op0 = copy_to_mode_reg (mode1, op0);
19844 if ((optimize && !register_operand (op1, mode2))
19845 || !(*insn_data[icode].operand[2].predicate) (op1, mode2))
19846 op1 = copy_to_mode_reg (mode2, op1);
19848 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19851 case CODE_FOR_sse4_1_blendvpd:
19852 case CODE_FOR_sse4_1_blendvps:
19853 case CODE_FOR_sse4_1_pblendvb:
19854 op2 = copy_to_mode_reg (mode3, op2);
19857 case CODE_FOR_sse4_1_roundsd:
19858 case CODE_FOR_sse4_1_roundss:
19859 case CODE_FOR_sse4_1_blendps:
19860 error ("the third argument must be a 4-bit immediate");
19863 case CODE_FOR_sse4_1_blendpd:
19864 error ("the third argument must be a 2-bit immediate");
19868 error ("the third argument must be an 8-bit immediate");
19872 pat = GEN_FCN (icode) (target, op0, op1, op2);
19879 /* Subroutine of ix86_expand_builtin to take care of crc32 insns. */
19882 ix86_expand_crc32 (enum insn_code icode, tree exp, rtx target)
19885 tree arg0 = CALL_EXPR_ARG (exp, 0);
19886 tree arg1 = CALL_EXPR_ARG (exp, 1);
19887 rtx op0 = expand_normal (arg0);
19888 rtx op1 = expand_normal (arg1);
19889 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19890 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19891 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
19895 || GET_MODE (target) != tmode
19896 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19897 target = gen_reg_rtx (tmode);
19899 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
19900 op0 = copy_to_mode_reg (mode0, op0);
19901 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
19903 op1 = copy_to_reg (op1);
19904 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
19907 pat = GEN_FCN (icode) (target, op0, op1);
19914 /* Subroutine of ix86_expand_builtin to take care of binop insns
19915 with an immediate. */
19918 ix86_expand_binop_imm_builtin (enum insn_code icode, tree exp,
19922 tree arg0 = CALL_EXPR_ARG (exp, 0);
19923 tree arg1 = CALL_EXPR_ARG (exp, 1);
19924 rtx op0 = expand_normal (arg0);
19925 rtx op1 = expand_normal (arg1);
19926 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19927 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19928 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
19930 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19932 op0 = copy_to_reg (op0);
19933 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
19936 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
19938 error ("the last operand must be an immediate");
19942 target = gen_reg_rtx (V2DImode);
19943 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target,
19952 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
19955 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
19958 tree arg0 = CALL_EXPR_ARG (exp, 0);
19959 tree arg1 = CALL_EXPR_ARG (exp, 1);
19960 rtx op0 = expand_normal (arg0);
19961 rtx op1 = expand_normal (arg1);
19962 enum machine_mode tmode = insn_data[icode].operand[0].mode;
19963 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
19964 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
19966 if (VECTOR_MODE_P (mode0))
19967 op0 = safe_vector_operand (op0, mode0);
19968 if (VECTOR_MODE_P (mode1))
19969 op1 = safe_vector_operand (op1, mode1);
19971 if (optimize || !target
19972 || GET_MODE (target) != tmode
19973 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19974 target = gen_reg_rtx (tmode);
19976 if (GET_MODE (op1) == SImode && mode1 == TImode)
19978 rtx x = gen_reg_rtx (V4SImode);
19979 emit_insn (gen_sse2_loadd (x, op1));
19980 op1 = gen_lowpart (TImode, x);
19983 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
19984 op0 = copy_to_mode_reg (mode0, op0);
19985 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
19986 op1 = copy_to_mode_reg (mode1, op1);
19988 /* ??? Using ix86_fixup_binary_operands is problematic when
19989 we've got mismatched modes. Fake it. */
19995 if (tmode == mode0 && tmode == mode1)
19997 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
20001 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
20003 op0 = force_reg (mode0, op0);
20004 op1 = force_reg (mode1, op1);
20005 target = gen_reg_rtx (tmode);
20008 pat = GEN_FCN (icode) (target, op0, op1);
20015 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
20018 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
20019 enum multi_arg_type m_type,
20020 enum insn_code sub_code)
20025 bool comparison_p = false;
20027 bool last_arg_constant = false;
20028 int num_memory = 0;
20031 enum machine_mode mode;
20034 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20038 case MULTI_ARG_3_SF:
20039 case MULTI_ARG_3_DF:
20040 case MULTI_ARG_3_DI:
20041 case MULTI_ARG_3_SI:
20042 case MULTI_ARG_3_SI_DI:
20043 case MULTI_ARG_3_HI:
20044 case MULTI_ARG_3_HI_SI:
20045 case MULTI_ARG_3_QI:
20046 case MULTI_ARG_3_PERMPS:
20047 case MULTI_ARG_3_PERMPD:
20051 case MULTI_ARG_2_SF:
20052 case MULTI_ARG_2_DF:
20053 case MULTI_ARG_2_DI:
20054 case MULTI_ARG_2_SI:
20055 case MULTI_ARG_2_HI:
20056 case MULTI_ARG_2_QI:
20060 case MULTI_ARG_2_DI_IMM:
20061 case MULTI_ARG_2_SI_IMM:
20062 case MULTI_ARG_2_HI_IMM:
20063 case MULTI_ARG_2_QI_IMM:
20065 last_arg_constant = true;
20068 case MULTI_ARG_1_SF:
20069 case MULTI_ARG_1_DF:
20070 case MULTI_ARG_1_DI:
20071 case MULTI_ARG_1_SI:
20072 case MULTI_ARG_1_HI:
20073 case MULTI_ARG_1_QI:
20074 case MULTI_ARG_1_SI_DI:
20075 case MULTI_ARG_1_HI_DI:
20076 case MULTI_ARG_1_HI_SI:
20077 case MULTI_ARG_1_QI_DI:
20078 case MULTI_ARG_1_QI_SI:
20079 case MULTI_ARG_1_QI_HI:
20080 case MULTI_ARG_1_PH2PS:
20081 case MULTI_ARG_1_PS2PH:
20085 case MULTI_ARG_2_SF_CMP:
20086 case MULTI_ARG_2_DF_CMP:
20087 case MULTI_ARG_2_DI_CMP:
20088 case MULTI_ARG_2_SI_CMP:
20089 case MULTI_ARG_2_HI_CMP:
20090 case MULTI_ARG_2_QI_CMP:
20092 comparison_p = true;
20095 case MULTI_ARG_2_SF_TF:
20096 case MULTI_ARG_2_DF_TF:
20097 case MULTI_ARG_2_DI_TF:
20098 case MULTI_ARG_2_SI_TF:
20099 case MULTI_ARG_2_HI_TF:
20100 case MULTI_ARG_2_QI_TF:
20105 case MULTI_ARG_UNKNOWN:
20107 gcc_unreachable ();
20110 if (optimize || !target
20111 || GET_MODE (target) != tmode
20112 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20113 target = gen_reg_rtx (tmode);
20115 gcc_assert (nargs <= 4);
20117 for (i = 0; i < nargs; i++)
20119 tree arg = CALL_EXPR_ARG (exp, i);
20120 rtx op = expand_normal (arg);
20121 int adjust = (comparison_p) ? 1 : 0;
20122 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
20124 if (last_arg_constant && i == nargs-1)
20126 if (GET_CODE (op) != CONST_INT)
20128 error ("last argument must be an immediate");
20129 return gen_reg_rtx (tmode);
20134 if (VECTOR_MODE_P (mode))
20135 op = safe_vector_operand (op, mode);
20137 /* If we aren't optimizing, only allow one memory operand to be
20139 if (memory_operand (op, mode))
20142 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
20145 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
20147 op = force_reg (mode, op);
20151 args[i].mode = mode;
20157 pat = GEN_FCN (icode) (target, args[0].op);
20162 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
20163 GEN_INT ((int)sub_code));
20164 else if (! comparison_p)
20165 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
20168 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
20172 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
20177 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
20181 gcc_unreachable ();
20191 /* Subroutine of ix86_expand_builtin to take care of stores. */
20194 ix86_expand_store_builtin (enum insn_code icode, tree exp)
20197 tree arg0 = CALL_EXPR_ARG (exp, 0);
20198 tree arg1 = CALL_EXPR_ARG (exp, 1);
20199 rtx op0 = expand_normal (arg0);
20200 rtx op1 = expand_normal (arg1);
20201 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
20202 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
20204 if (VECTOR_MODE_P (mode1))
20205 op1 = safe_vector_operand (op1, mode1);
20207 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
20208 op1 = copy_to_mode_reg (mode1, op1);
20210 pat = GEN_FCN (icode) (op0, op1);
20216 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
20219 ix86_expand_unop_builtin (enum insn_code icode, tree exp,
20220 rtx target, int do_load)
20223 tree arg0 = CALL_EXPR_ARG (exp, 0);
20224 rtx op0 = expand_normal (arg0);
20225 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20226 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20228 if (optimize || !target
20229 || GET_MODE (target) != tmode
20230 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20231 target = gen_reg_rtx (tmode);
20233 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
20236 if (VECTOR_MODE_P (mode0))
20237 op0 = safe_vector_operand (op0, mode0);
20239 if ((optimize && !register_operand (op0, mode0))
20240 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20241 op0 = copy_to_mode_reg (mode0, op0);
20246 case CODE_FOR_sse4_1_roundpd:
20247 case CODE_FOR_sse4_1_roundps:
20249 tree arg1 = CALL_EXPR_ARG (exp, 1);
20250 rtx op1 = expand_normal (arg1);
20251 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
20253 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
20255 error ("the second argument must be a 4-bit immediate");
20258 pat = GEN_FCN (icode) (target, op0, op1);
20262 pat = GEN_FCN (icode) (target, op0);
20272 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
20273 sqrtss, rsqrtss, rcpss. */
20276 ix86_expand_unop1_builtin (enum insn_code icode, tree exp, rtx target)
20279 tree arg0 = CALL_EXPR_ARG (exp, 0);
20280 rtx op1, op0 = expand_normal (arg0);
20281 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20282 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20284 if (optimize || !target
20285 || GET_MODE (target) != tmode
20286 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20287 target = gen_reg_rtx (tmode);
20289 if (VECTOR_MODE_P (mode0))
20290 op0 = safe_vector_operand (op0, mode0);
20292 if ((optimize && !register_operand (op0, mode0))
20293 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20294 op0 = copy_to_mode_reg (mode0, op0);
20297 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
20298 op1 = copy_to_mode_reg (mode0, op1);
20300 pat = GEN_FCN (icode) (target, op0, op1);
20307 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
20310 ix86_expand_sse_compare (const struct builtin_description *d, tree exp,
20314 tree arg0 = CALL_EXPR_ARG (exp, 0);
20315 tree arg1 = CALL_EXPR_ARG (exp, 1);
20316 rtx op0 = expand_normal (arg0);
20317 rtx op1 = expand_normal (arg1);
20319 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
20320 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
20321 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
20322 enum rtx_code comparison = d->comparison;
20324 if (VECTOR_MODE_P (mode0))
20325 op0 = safe_vector_operand (op0, mode0);
20326 if (VECTOR_MODE_P (mode1))
20327 op1 = safe_vector_operand (op1, mode1);
20329 /* Swap operands if we have a comparison that isn't available in
20331 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
20333 rtx tmp = gen_reg_rtx (mode1);
20334 emit_move_insn (tmp, op1);
20339 if (optimize || !target
20340 || GET_MODE (target) != tmode
20341 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
20342 target = gen_reg_rtx (tmode);
20344 if ((optimize && !register_operand (op0, mode0))
20345 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
20346 op0 = copy_to_mode_reg (mode0, op0);
20347 if ((optimize && !register_operand (op1, mode1))
20348 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
20349 op1 = copy_to_mode_reg (mode1, op1);
20351 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
20352 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
20359 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
20362 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
20366 tree arg0 = CALL_EXPR_ARG (exp, 0);
20367 tree arg1 = CALL_EXPR_ARG (exp, 1);
20368 rtx op0 = expand_normal (arg0);
20369 rtx op1 = expand_normal (arg1);
20370 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
20371 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
20372 enum rtx_code comparison = d->comparison;
20374 if (VECTOR_MODE_P (mode0))
20375 op0 = safe_vector_operand (op0, mode0);
20376 if (VECTOR_MODE_P (mode1))
20377 op1 = safe_vector_operand (op1, mode1);
20379 /* Swap operands if we have a comparison that isn't available in
20381 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
20388 target = gen_reg_rtx (SImode);
20389 emit_move_insn (target, const0_rtx);
20390 target = gen_rtx_SUBREG (QImode, target, 0);
20392 if ((optimize && !register_operand (op0, mode0))
20393 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
20394 op0 = copy_to_mode_reg (mode0, op0);
20395 if ((optimize && !register_operand (op1, mode1))
20396 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
20397 op1 = copy_to_mode_reg (mode1, op1);
20399 pat = GEN_FCN (d->icode) (op0, op1);
20403 emit_insn (gen_rtx_SET (VOIDmode,
20404 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20405 gen_rtx_fmt_ee (comparison, QImode,
20409 return SUBREG_REG (target);
20412 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
20415 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
20419 tree arg0 = CALL_EXPR_ARG (exp, 0);
20420 tree arg1 = CALL_EXPR_ARG (exp, 1);
20421 rtx op0 = expand_normal (arg0);
20422 rtx op1 = expand_normal (arg1);
20423 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
20424 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
20425 enum rtx_code comparison = d->comparison;
20427 if (VECTOR_MODE_P (mode0))
20428 op0 = safe_vector_operand (op0, mode0);
20429 if (VECTOR_MODE_P (mode1))
20430 op1 = safe_vector_operand (op1, mode1);
20432 target = gen_reg_rtx (SImode);
20433 emit_move_insn (target, const0_rtx);
20434 target = gen_rtx_SUBREG (QImode, target, 0);
20436 if ((optimize && !register_operand (op0, mode0))
20437 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
20438 op0 = copy_to_mode_reg (mode0, op0);
20439 if ((optimize && !register_operand (op1, mode1))
20440 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
20441 op1 = copy_to_mode_reg (mode1, op1);
20443 pat = GEN_FCN (d->icode) (op0, op1);
20447 emit_insn (gen_rtx_SET (VOIDmode,
20448 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20449 gen_rtx_fmt_ee (comparison, QImode,
20453 return SUBREG_REG (target);
20456 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
20459 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
20460 tree exp, rtx target)
20463 tree arg0 = CALL_EXPR_ARG (exp, 0);
20464 tree arg1 = CALL_EXPR_ARG (exp, 1);
20465 tree arg2 = CALL_EXPR_ARG (exp, 2);
20466 tree arg3 = CALL_EXPR_ARG (exp, 3);
20467 tree arg4 = CALL_EXPR_ARG (exp, 4);
20468 rtx scratch0, scratch1;
20469 rtx op0 = expand_normal (arg0);
20470 rtx op1 = expand_normal (arg1);
20471 rtx op2 = expand_normal (arg2);
20472 rtx op3 = expand_normal (arg3);
20473 rtx op4 = expand_normal (arg4);
20474 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
20476 tmode0 = insn_data[d->icode].operand[0].mode;
20477 tmode1 = insn_data[d->icode].operand[1].mode;
20478 modev2 = insn_data[d->icode].operand[2].mode;
20479 modei3 = insn_data[d->icode].operand[3].mode;
20480 modev4 = insn_data[d->icode].operand[4].mode;
20481 modei5 = insn_data[d->icode].operand[5].mode;
20482 modeimm = insn_data[d->icode].operand[6].mode;
20484 if (VECTOR_MODE_P (modev2))
20485 op0 = safe_vector_operand (op0, modev2);
20486 if (VECTOR_MODE_P (modev4))
20487 op2 = safe_vector_operand (op2, modev4);
20489 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
20490 op0 = copy_to_mode_reg (modev2, op0);
20491 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
20492 op1 = copy_to_mode_reg (modei3, op1);
20493 if ((optimize && !register_operand (op2, modev4))
20494 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
20495 op2 = copy_to_mode_reg (modev4, op2);
20496 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
20497 op3 = copy_to_mode_reg (modei5, op3);
20499 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
20501 error ("the fifth argument must be a 8-bit immediate");
20505 if (d->code == IX86_BUILTIN_PCMPESTRI128)
20507 if (optimize || !target
20508 || GET_MODE (target) != tmode0
20509 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
20510 target = gen_reg_rtx (tmode0);
20512 scratch1 = gen_reg_rtx (tmode1);
20514 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
20516 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
20518 if (optimize || !target
20519 || GET_MODE (target) != tmode1
20520 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
20521 target = gen_reg_rtx (tmode1);
20523 scratch0 = gen_reg_rtx (tmode0);
20525 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
20529 gcc_assert (d->flag);
20531 scratch0 = gen_reg_rtx (tmode0);
20532 scratch1 = gen_reg_rtx (tmode1);
20534 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
20544 target = gen_reg_rtx (SImode);
20545 emit_move_insn (target, const0_rtx);
20546 target = gen_rtx_SUBREG (QImode, target, 0);
20549 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20550 gen_rtx_fmt_ee (EQ, QImode,
20551 gen_rtx_REG ((enum machine_mode) d->flag,
20554 return SUBREG_REG (target);
20561 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
20564 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
20565 tree exp, rtx target)
20568 tree arg0 = CALL_EXPR_ARG (exp, 0);
20569 tree arg1 = CALL_EXPR_ARG (exp, 1);
20570 tree arg2 = CALL_EXPR_ARG (exp, 2);
20571 rtx scratch0, scratch1;
20572 rtx op0 = expand_normal (arg0);
20573 rtx op1 = expand_normal (arg1);
20574 rtx op2 = expand_normal (arg2);
20575 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
20577 tmode0 = insn_data[d->icode].operand[0].mode;
20578 tmode1 = insn_data[d->icode].operand[1].mode;
20579 modev2 = insn_data[d->icode].operand[2].mode;
20580 modev3 = insn_data[d->icode].operand[3].mode;
20581 modeimm = insn_data[d->icode].operand[4].mode;
20583 if (VECTOR_MODE_P (modev2))
20584 op0 = safe_vector_operand (op0, modev2);
20585 if (VECTOR_MODE_P (modev3))
20586 op1 = safe_vector_operand (op1, modev3);
20588 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
20589 op0 = copy_to_mode_reg (modev2, op0);
20590 if ((optimize && !register_operand (op1, modev3))
20591 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
20592 op1 = copy_to_mode_reg (modev3, op1);
20594 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
20596 error ("the third argument must be a 8-bit immediate");
20600 if (d->code == IX86_BUILTIN_PCMPISTRI128)
20602 if (optimize || !target
20603 || GET_MODE (target) != tmode0
20604 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
20605 target = gen_reg_rtx (tmode0);
20607 scratch1 = gen_reg_rtx (tmode1);
20609 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
20611 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
20613 if (optimize || !target
20614 || GET_MODE (target) != tmode1
20615 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
20616 target = gen_reg_rtx (tmode1);
20618 scratch0 = gen_reg_rtx (tmode0);
20620 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
20624 gcc_assert (d->flag);
20626 scratch0 = gen_reg_rtx (tmode0);
20627 scratch1 = gen_reg_rtx (tmode1);
20629 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
20639 target = gen_reg_rtx (SImode);
20640 emit_move_insn (target, const0_rtx);
20641 target = gen_rtx_SUBREG (QImode, target, 0);
20644 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20645 gen_rtx_fmt_ee (EQ, QImode,
20646 gen_rtx_REG ((enum machine_mode) d->flag,
20649 return SUBREG_REG (target);
20655 /* Return the integer constant in ARG. Constrain it to be in the range
20656 of the subparts of VEC_TYPE; issue an error if not. */
20659 get_element_number (tree vec_type, tree arg)
20661 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
20663 if (!host_integerp (arg, 1)
20664 || (elt = tree_low_cst (arg, 1), elt > max))
20666 error ("selector must be an integer constant in the range 0..%wi", max);
20673 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
20674 ix86_expand_vector_init. We DO have language-level syntax for this, in
20675 the form of (type){ init-list }. Except that since we can't place emms
20676 instructions from inside the compiler, we can't allow the use of MMX
20677 registers unless the user explicitly asks for it. So we do *not* define
20678 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
20679 we have builtins invoked by mmintrin.h that gives us license to emit
20680 these sorts of instructions. */
20683 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
20685 enum machine_mode tmode = TYPE_MODE (type);
20686 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
20687 int i, n_elt = GET_MODE_NUNITS (tmode);
20688 rtvec v = rtvec_alloc (n_elt);
20690 gcc_assert (VECTOR_MODE_P (tmode));
20691 gcc_assert (call_expr_nargs (exp) == n_elt);
20693 for (i = 0; i < n_elt; ++i)
20695 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
20696 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
20699 if (!target || !register_operand (target, tmode))
20700 target = gen_reg_rtx (tmode);
20702 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
20706 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
20707 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
20708 had a language-level syntax for referencing vector elements. */
20711 ix86_expand_vec_ext_builtin (tree exp, rtx target)
20713 enum machine_mode tmode, mode0;
20718 arg0 = CALL_EXPR_ARG (exp, 0);
20719 arg1 = CALL_EXPR_ARG (exp, 1);
20721 op0 = expand_normal (arg0);
20722 elt = get_element_number (TREE_TYPE (arg0), arg1);
20724 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
20725 mode0 = TYPE_MODE (TREE_TYPE (arg0));
20726 gcc_assert (VECTOR_MODE_P (mode0));
20728 op0 = force_reg (mode0, op0);
20730 if (optimize || !target || !register_operand (target, tmode))
20731 target = gen_reg_rtx (tmode);
20733 ix86_expand_vector_extract (true, target, op0, elt);
20738 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
20739 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
20740 a language-level syntax for referencing vector elements. */
20743 ix86_expand_vec_set_builtin (tree exp)
20745 enum machine_mode tmode, mode1;
20746 tree arg0, arg1, arg2;
20748 rtx op0, op1, target;
20750 arg0 = CALL_EXPR_ARG (exp, 0);
20751 arg1 = CALL_EXPR_ARG (exp, 1);
20752 arg2 = CALL_EXPR_ARG (exp, 2);
20754 tmode = TYPE_MODE (TREE_TYPE (arg0));
20755 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
20756 gcc_assert (VECTOR_MODE_P (tmode));
20758 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
20759 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
20760 elt = get_element_number (TREE_TYPE (arg0), arg2);
20762 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
20763 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
20765 op0 = force_reg (tmode, op0);
20766 op1 = force_reg (mode1, op1);
20768 /* OP0 is the source of these builtin functions and shouldn't be
20769 modified. Create a copy, use it and return it as target. */
20770 target = gen_reg_rtx (tmode);
20771 emit_move_insn (target, op0);
20772 ix86_expand_vector_set (true, target, op1, elt);
20777 /* Expand an expression EXP that calls a built-in function,
20778 with result going to TARGET if that's convenient
20779 (and in mode MODE if that's convenient).
20780 SUBTARGET may be used as the target for computing one of EXP's operands.
20781 IGNORE is nonzero if the value is to be ignored. */
20784 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
20785 enum machine_mode mode ATTRIBUTE_UNUSED,
20786 int ignore ATTRIBUTE_UNUSED)
20788 const struct builtin_description *d;
20790 enum insn_code icode;
20791 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
20792 tree arg0, arg1, arg2, arg3;
20793 rtx op0, op1, op2, op3, pat;
20794 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
20795 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
20799 case IX86_BUILTIN_EMMS:
20800 emit_insn (gen_mmx_emms ());
20803 case IX86_BUILTIN_SFENCE:
20804 emit_insn (gen_sse_sfence ());
20807 case IX86_BUILTIN_MASKMOVQ:
20808 case IX86_BUILTIN_MASKMOVDQU:
20809 icode = (fcode == IX86_BUILTIN_MASKMOVQ
20810 ? CODE_FOR_mmx_maskmovq
20811 : CODE_FOR_sse2_maskmovdqu);
20812 /* Note the arg order is different from the operand order. */
20813 arg1 = CALL_EXPR_ARG (exp, 0);
20814 arg2 = CALL_EXPR_ARG (exp, 1);
20815 arg0 = CALL_EXPR_ARG (exp, 2);
20816 op0 = expand_normal (arg0);
20817 op1 = expand_normal (arg1);
20818 op2 = expand_normal (arg2);
20819 mode0 = insn_data[icode].operand[0].mode;
20820 mode1 = insn_data[icode].operand[1].mode;
20821 mode2 = insn_data[icode].operand[2].mode;
20823 op0 = force_reg (Pmode, op0);
20824 op0 = gen_rtx_MEM (mode1, op0);
20826 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
20827 op0 = copy_to_mode_reg (mode0, op0);
20828 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
20829 op1 = copy_to_mode_reg (mode1, op1);
20830 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
20831 op2 = copy_to_mode_reg (mode2, op2);
20832 pat = GEN_FCN (icode) (op0, op1, op2);
20838 case IX86_BUILTIN_RSQRTF:
20839 return ix86_expand_unop1_builtin (CODE_FOR_rsqrtsf2, exp, target);
20841 case IX86_BUILTIN_SQRTSS:
20842 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, exp, target);
20843 case IX86_BUILTIN_RSQRTSS:
20844 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, exp, target);
20845 case IX86_BUILTIN_RCPSS:
20846 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, exp, target);
20848 case IX86_BUILTIN_LOADUPS:
20849 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, exp, target, 1);
20851 case IX86_BUILTIN_STOREUPS:
20852 return ix86_expand_store_builtin (CODE_FOR_sse_movups, exp);
20854 case IX86_BUILTIN_LOADHPS:
20855 case IX86_BUILTIN_LOADLPS:
20856 case IX86_BUILTIN_LOADHPD:
20857 case IX86_BUILTIN_LOADLPD:
20858 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
20859 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
20860 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
20861 : CODE_FOR_sse2_loadlpd);
20862 arg0 = CALL_EXPR_ARG (exp, 0);
20863 arg1 = CALL_EXPR_ARG (exp, 1);
20864 op0 = expand_normal (arg0);
20865 op1 = expand_normal (arg1);
20866 tmode = insn_data[icode].operand[0].mode;
20867 mode0 = insn_data[icode].operand[1].mode;
20868 mode1 = insn_data[icode].operand[2].mode;
20870 op0 = force_reg (mode0, op0);
20871 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
20872 if (optimize || target == 0
20873 || GET_MODE (target) != tmode
20874 || !register_operand (target, tmode))
20875 target = gen_reg_rtx (tmode);
20876 pat = GEN_FCN (icode) (target, op0, op1);
20882 case IX86_BUILTIN_STOREHPS:
20883 case IX86_BUILTIN_STORELPS:
20884 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
20885 : CODE_FOR_sse_storelps);
20886 arg0 = CALL_EXPR_ARG (exp, 0);
20887 arg1 = CALL_EXPR_ARG (exp, 1);
20888 op0 = expand_normal (arg0);
20889 op1 = expand_normal (arg1);
20890 mode0 = insn_data[icode].operand[0].mode;
20891 mode1 = insn_data[icode].operand[1].mode;
20893 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
20894 op1 = force_reg (mode1, op1);
20896 pat = GEN_FCN (icode) (op0, op1);
20902 case IX86_BUILTIN_MOVNTPS:
20903 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, exp);
20904 case IX86_BUILTIN_MOVNTQ:
20905 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, exp);
20907 case IX86_BUILTIN_LDMXCSR:
20908 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
20909 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
20910 emit_move_insn (target, op0);
20911 emit_insn (gen_sse_ldmxcsr (target));
20914 case IX86_BUILTIN_STMXCSR:
20915 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
20916 emit_insn (gen_sse_stmxcsr (target));
20917 return copy_to_mode_reg (SImode, target);
20919 case IX86_BUILTIN_PSHUFW:
20920 case IX86_BUILTIN_PSHUFD:
20921 case IX86_BUILTIN_PSHUFHW:
20922 case IX86_BUILTIN_PSHUFLW:
20923 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
20924 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
20925 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
20926 : CODE_FOR_mmx_pshufw);
20927 arg0 = CALL_EXPR_ARG (exp, 0);
20928 arg1 = CALL_EXPR_ARG (exp, 1);
20929 op0 = expand_normal (arg0);
20930 op1 = expand_normal (arg1);
20931 tmode = insn_data[icode].operand[0].mode;
20932 mode1 = insn_data[icode].operand[1].mode;
20933 mode2 = insn_data[icode].operand[2].mode;
20935 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
20936 op0 = copy_to_mode_reg (mode1, op0);
20937 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
20939 /* @@@ better error message */
20940 error ("mask must be an immediate");
20944 || GET_MODE (target) != tmode
20945 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20946 target = gen_reg_rtx (tmode);
20947 pat = GEN_FCN (icode) (target, op0, op1);
20953 case IX86_BUILTIN_PSLLW:
20954 case IX86_BUILTIN_PSLLWI:
20955 icode = CODE_FOR_mmx_ashlv4hi3;
20957 case IX86_BUILTIN_PSLLD:
20958 case IX86_BUILTIN_PSLLDI:
20959 icode = CODE_FOR_mmx_ashlv2si3;
20961 case IX86_BUILTIN_PSLLQ:
20962 case IX86_BUILTIN_PSLLQI:
20963 icode = CODE_FOR_mmx_ashlv1di3;
20965 case IX86_BUILTIN_PSRAW:
20966 case IX86_BUILTIN_PSRAWI:
20967 icode = CODE_FOR_mmx_ashrv4hi3;
20969 case IX86_BUILTIN_PSRAD:
20970 case IX86_BUILTIN_PSRADI:
20971 icode = CODE_FOR_mmx_ashrv2si3;
20973 case IX86_BUILTIN_PSRLW:
20974 case IX86_BUILTIN_PSRLWI:
20975 icode = CODE_FOR_mmx_lshrv4hi3;
20977 case IX86_BUILTIN_PSRLD:
20978 case IX86_BUILTIN_PSRLDI:
20979 icode = CODE_FOR_mmx_lshrv2si3;
20981 case IX86_BUILTIN_PSRLQ:
20982 case IX86_BUILTIN_PSRLQI:
20983 icode = CODE_FOR_mmx_lshrv1di3;
20986 case IX86_BUILTIN_PSLLW128:
20987 case IX86_BUILTIN_PSLLWI128:
20988 icode = CODE_FOR_ashlv8hi3;
20990 case IX86_BUILTIN_PSLLD128:
20991 case IX86_BUILTIN_PSLLDI128:
20992 icode = CODE_FOR_ashlv4si3;
20994 case IX86_BUILTIN_PSLLQ128:
20995 case IX86_BUILTIN_PSLLQI128:
20996 icode = CODE_FOR_ashlv2di3;
20998 case IX86_BUILTIN_PSRAW128:
20999 case IX86_BUILTIN_PSRAWI128:
21000 icode = CODE_FOR_ashrv8hi3;
21002 case IX86_BUILTIN_PSRAD128:
21003 case IX86_BUILTIN_PSRADI128:
21004 icode = CODE_FOR_ashrv4si3;
21006 case IX86_BUILTIN_PSRLW128:
21007 case IX86_BUILTIN_PSRLWI128:
21008 icode = CODE_FOR_lshrv8hi3;
21010 case IX86_BUILTIN_PSRLD128:
21011 case IX86_BUILTIN_PSRLDI128:
21012 icode = CODE_FOR_lshrv4si3;
21014 case IX86_BUILTIN_PSRLQ128:
21015 case IX86_BUILTIN_PSRLQI128:
21016 icode = CODE_FOR_lshrv2di3;
21019 arg0 = CALL_EXPR_ARG (exp, 0);
21020 arg1 = CALL_EXPR_ARG (exp, 1);
21021 op0 = expand_normal (arg0);
21022 op1 = expand_normal (arg1);
21024 tmode = insn_data[icode].operand[0].mode;
21025 mode1 = insn_data[icode].operand[1].mode;
21027 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21028 op0 = copy_to_reg (op0);
21030 if (!CONST_INT_P (op1))
21031 op1 = simplify_gen_subreg (SImode, op1, GET_MODE (op1), 0);
21033 if (! (*insn_data[icode].operand[2].predicate) (op1, SImode))
21034 op1 = copy_to_reg (op1);
21036 target = gen_reg_rtx (tmode);
21037 pat = GEN_FCN (icode) (target, op0, op1);
21043 case IX86_BUILTIN_PSLLDQI128:
21044 return ix86_expand_binop_imm_builtin (CODE_FOR_sse2_ashlti3,
21048 case IX86_BUILTIN_PSRLDQI128:
21049 return ix86_expand_binop_imm_builtin (CODE_FOR_sse2_lshrti3,
21053 case IX86_BUILTIN_AESKEYGENASSIST128:
21054 return ix86_expand_binop_imm_builtin (CODE_FOR_aeskeygenassist,
21057 case IX86_BUILTIN_FEMMS:
21058 emit_insn (gen_mmx_femms ());
21061 case IX86_BUILTIN_PAVGUSB:
21062 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, exp, target);
21064 case IX86_BUILTIN_PF2ID:
21065 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, exp, target, 0);
21067 case IX86_BUILTIN_PFACC:
21068 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, exp, target);
21070 case IX86_BUILTIN_PFADD:
21071 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, exp, target);
21073 case IX86_BUILTIN_PFCMPEQ:
21074 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, exp, target);
21076 case IX86_BUILTIN_PFCMPGE:
21077 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, exp, target);
21079 case IX86_BUILTIN_PFCMPGT:
21080 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, exp, target);
21082 case IX86_BUILTIN_PFMAX:
21083 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, exp, target);
21085 case IX86_BUILTIN_PFMIN:
21086 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, exp, target);
21088 case IX86_BUILTIN_PFMUL:
21089 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, exp, target);
21091 case IX86_BUILTIN_PFRCP:
21092 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, exp, target, 0);
21094 case IX86_BUILTIN_PFRCPIT1:
21095 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, exp, target);
21097 case IX86_BUILTIN_PFRCPIT2:
21098 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, exp, target);
21100 case IX86_BUILTIN_PFRSQIT1:
21101 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, exp, target);
21103 case IX86_BUILTIN_PFRSQRT:
21104 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, exp, target, 0);
21106 case IX86_BUILTIN_PFSUB:
21107 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, exp, target);
21109 case IX86_BUILTIN_PFSUBR:
21110 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, exp, target);
21112 case IX86_BUILTIN_PI2FD:
21113 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, exp, target, 0);
21115 case IX86_BUILTIN_PMULHRW:
21116 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, exp, target);
21118 case IX86_BUILTIN_PF2IW:
21119 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, exp, target, 0);
21121 case IX86_BUILTIN_PFNACC:
21122 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, exp, target);
21124 case IX86_BUILTIN_PFPNACC:
21125 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, exp, target);
21127 case IX86_BUILTIN_PI2FW:
21128 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, exp, target, 0);
21130 case IX86_BUILTIN_PSWAPDSI:
21131 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, exp, target, 0);
21133 case IX86_BUILTIN_PSWAPDSF:
21134 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, exp, target, 0);
21136 case IX86_BUILTIN_SQRTSD:
21137 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, exp, target);
21138 case IX86_BUILTIN_LOADUPD:
21139 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, exp, target, 1);
21140 case IX86_BUILTIN_STOREUPD:
21141 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, exp);
21143 case IX86_BUILTIN_MFENCE:
21144 emit_insn (gen_sse2_mfence ());
21146 case IX86_BUILTIN_LFENCE:
21147 emit_insn (gen_sse2_lfence ());
21150 case IX86_BUILTIN_CLFLUSH:
21151 arg0 = CALL_EXPR_ARG (exp, 0);
21152 op0 = expand_normal (arg0);
21153 icode = CODE_FOR_sse2_clflush;
21154 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
21155 op0 = copy_to_mode_reg (Pmode, op0);
21157 emit_insn (gen_sse2_clflush (op0));
21160 case IX86_BUILTIN_MOVNTPD:
21161 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, exp);
21162 case IX86_BUILTIN_MOVNTDQ:
21163 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, exp);
21164 case IX86_BUILTIN_MOVNTI:
21165 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, exp);
21167 case IX86_BUILTIN_LOADDQU:
21168 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, exp, target, 1);
21169 case IX86_BUILTIN_STOREDQU:
21170 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, exp);
21172 case IX86_BUILTIN_MONITOR:
21173 arg0 = CALL_EXPR_ARG (exp, 0);
21174 arg1 = CALL_EXPR_ARG (exp, 1);
21175 arg2 = CALL_EXPR_ARG (exp, 2);
21176 op0 = expand_normal (arg0);
21177 op1 = expand_normal (arg1);
21178 op2 = expand_normal (arg2);
21180 op0 = copy_to_mode_reg (Pmode, op0);
21182 op1 = copy_to_mode_reg (SImode, op1);
21184 op2 = copy_to_mode_reg (SImode, op2);
21186 emit_insn (gen_sse3_monitor (op0, op1, op2));
21188 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
21191 case IX86_BUILTIN_MWAIT:
21192 arg0 = CALL_EXPR_ARG (exp, 0);
21193 arg1 = CALL_EXPR_ARG (exp, 1);
21194 op0 = expand_normal (arg0);
21195 op1 = expand_normal (arg1);
21197 op0 = copy_to_mode_reg (SImode, op0);
21199 op1 = copy_to_mode_reg (SImode, op1);
21200 emit_insn (gen_sse3_mwait (op0, op1));
21203 case IX86_BUILTIN_LDDQU:
21204 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, exp,
21207 case IX86_BUILTIN_PALIGNR:
21208 case IX86_BUILTIN_PALIGNR128:
21209 if (fcode == IX86_BUILTIN_PALIGNR)
21211 icode = CODE_FOR_ssse3_palignrdi;
21216 icode = CODE_FOR_ssse3_palignrti;
21219 arg0 = CALL_EXPR_ARG (exp, 0);
21220 arg1 = CALL_EXPR_ARG (exp, 1);
21221 arg2 = CALL_EXPR_ARG (exp, 2);
21222 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
21223 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
21224 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, EXPAND_NORMAL);
21225 tmode = insn_data[icode].operand[0].mode;
21226 mode1 = insn_data[icode].operand[1].mode;
21227 mode2 = insn_data[icode].operand[2].mode;
21228 mode3 = insn_data[icode].operand[3].mode;
21230 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21232 op0 = copy_to_reg (op0);
21233 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
21235 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21237 op1 = copy_to_reg (op1);
21238 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
21240 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
21242 error ("shift must be an immediate");
21245 target = gen_reg_rtx (mode);
21246 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
21253 case IX86_BUILTIN_MOVNTDQA:
21254 return ix86_expand_unop_builtin (CODE_FOR_sse4_1_movntdqa, exp,
21257 case IX86_BUILTIN_MOVNTSD:
21258 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp);
21260 case IX86_BUILTIN_MOVNTSS:
21261 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp);
21263 case IX86_BUILTIN_INSERTQ:
21264 case IX86_BUILTIN_EXTRQ:
21265 icode = (fcode == IX86_BUILTIN_EXTRQ
21266 ? CODE_FOR_sse4a_extrq
21267 : CODE_FOR_sse4a_insertq);
21268 arg0 = CALL_EXPR_ARG (exp, 0);
21269 arg1 = CALL_EXPR_ARG (exp, 1);
21270 op0 = expand_normal (arg0);
21271 op1 = expand_normal (arg1);
21272 tmode = insn_data[icode].operand[0].mode;
21273 mode1 = insn_data[icode].operand[1].mode;
21274 mode2 = insn_data[icode].operand[2].mode;
21275 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21276 op0 = copy_to_mode_reg (mode1, op0);
21277 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21278 op1 = copy_to_mode_reg (mode2, op1);
21279 if (optimize || target == 0
21280 || GET_MODE (target) != tmode
21281 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21282 target = gen_reg_rtx (tmode);
21283 pat = GEN_FCN (icode) (target, op0, op1);
21289 case IX86_BUILTIN_EXTRQI:
21290 icode = CODE_FOR_sse4a_extrqi;
21291 arg0 = CALL_EXPR_ARG (exp, 0);
21292 arg1 = CALL_EXPR_ARG (exp, 1);
21293 arg2 = CALL_EXPR_ARG (exp, 2);
21294 op0 = expand_normal (arg0);
21295 op1 = expand_normal (arg1);
21296 op2 = expand_normal (arg2);
21297 tmode = insn_data[icode].operand[0].mode;
21298 mode1 = insn_data[icode].operand[1].mode;
21299 mode2 = insn_data[icode].operand[2].mode;
21300 mode3 = insn_data[icode].operand[3].mode;
21301 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21302 op0 = copy_to_mode_reg (mode1, op0);
21303 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21305 error ("index mask must be an immediate");
21306 return gen_reg_rtx (tmode);
21308 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
21310 error ("length mask must be an immediate");
21311 return gen_reg_rtx (tmode);
21313 if (optimize || target == 0
21314 || GET_MODE (target) != tmode
21315 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21316 target = gen_reg_rtx (tmode);
21317 pat = GEN_FCN (icode) (target, op0, op1, op2);
21323 case IX86_BUILTIN_INSERTQI:
21324 icode = CODE_FOR_sse4a_insertqi;
21325 arg0 = CALL_EXPR_ARG (exp, 0);
21326 arg1 = CALL_EXPR_ARG (exp, 1);
21327 arg2 = CALL_EXPR_ARG (exp, 2);
21328 arg3 = CALL_EXPR_ARG (exp, 3);
21329 op0 = expand_normal (arg0);
21330 op1 = expand_normal (arg1);
21331 op2 = expand_normal (arg2);
21332 op3 = expand_normal (arg3);
21333 tmode = insn_data[icode].operand[0].mode;
21334 mode1 = insn_data[icode].operand[1].mode;
21335 mode2 = insn_data[icode].operand[2].mode;
21336 mode3 = insn_data[icode].operand[3].mode;
21337 mode4 = insn_data[icode].operand[4].mode;
21339 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
21340 op0 = copy_to_mode_reg (mode1, op0);
21342 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
21343 op1 = copy_to_mode_reg (mode2, op1);
21345 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
21347 error ("index mask must be an immediate");
21348 return gen_reg_rtx (tmode);
21350 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
21352 error ("length mask must be an immediate");
21353 return gen_reg_rtx (tmode);
21355 if (optimize || target == 0
21356 || GET_MODE (target) != tmode
21357 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
21358 target = gen_reg_rtx (tmode);
21359 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
21365 case IX86_BUILTIN_VEC_INIT_V2SI:
21366 case IX86_BUILTIN_VEC_INIT_V4HI:
21367 case IX86_BUILTIN_VEC_INIT_V8QI:
21368 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
21370 case IX86_BUILTIN_VEC_EXT_V2DF:
21371 case IX86_BUILTIN_VEC_EXT_V2DI:
21372 case IX86_BUILTIN_VEC_EXT_V4SF:
21373 case IX86_BUILTIN_VEC_EXT_V4SI:
21374 case IX86_BUILTIN_VEC_EXT_V8HI:
21375 case IX86_BUILTIN_VEC_EXT_V2SI:
21376 case IX86_BUILTIN_VEC_EXT_V4HI:
21377 case IX86_BUILTIN_VEC_EXT_V16QI:
21378 return ix86_expand_vec_ext_builtin (exp, target);
21380 case IX86_BUILTIN_VEC_SET_V2DI:
21381 case IX86_BUILTIN_VEC_SET_V4SF:
21382 case IX86_BUILTIN_VEC_SET_V4SI:
21383 case IX86_BUILTIN_VEC_SET_V8HI:
21384 case IX86_BUILTIN_VEC_SET_V4HI:
21385 case IX86_BUILTIN_VEC_SET_V16QI:
21386 return ix86_expand_vec_set_builtin (exp);
21388 case IX86_BUILTIN_INFQ:
21390 REAL_VALUE_TYPE inf;
21394 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
21396 tmp = validize_mem (force_const_mem (mode, tmp));
21399 target = gen_reg_rtx (mode);
21401 emit_move_insn (target, tmp);
21405 case IX86_BUILTIN_FABSQ:
21406 return ix86_expand_unop_builtin (CODE_FOR_abstf2, exp, target, 0);
21408 case IX86_BUILTIN_COPYSIGNQ:
21409 return ix86_expand_binop_builtin (CODE_FOR_copysigntf3, exp, target);
21415 for (i = 0, d = bdesc_sse_3arg;
21416 i < ARRAY_SIZE (bdesc_sse_3arg);
21418 if (d->code == fcode)
21419 return ix86_expand_sse_4_operands_builtin (d->icode, exp,
21422 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
21423 if (d->code == fcode)
21425 /* Compares are treated specially. */
21426 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
21427 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
21428 || d->icode == CODE_FOR_sse2_maskcmpv2df3
21429 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
21430 return ix86_expand_sse_compare (d, exp, target);
21432 return ix86_expand_binop_builtin (d->icode, exp, target);
21435 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
21436 if (d->code == fcode)
21437 return ix86_expand_unop_builtin (d->icode, exp, target, 0);
21439 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
21440 if (d->code == fcode)
21441 return ix86_expand_sse_comi (d, exp, target);
21443 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
21444 if (d->code == fcode)
21445 return ix86_expand_sse_ptest (d, exp, target);
21447 for (i = 0, d = bdesc_crc32; i < ARRAY_SIZE (bdesc_crc32); i++, d++)
21448 if (d->code == fcode)
21449 return ix86_expand_crc32 (d->icode, exp, target);
21451 for (i = 0, d = bdesc_pcmpestr;
21452 i < ARRAY_SIZE (bdesc_pcmpestr);
21454 if (d->code == fcode)
21455 return ix86_expand_sse_pcmpestr (d, exp, target);
21457 for (i = 0, d = bdesc_pcmpistr;
21458 i < ARRAY_SIZE (bdesc_pcmpistr);
21460 if (d->code == fcode)
21461 return ix86_expand_sse_pcmpistr (d, exp, target);
21463 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
21464 if (d->code == fcode)
21465 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
21466 (enum multi_arg_type)d->flag,
21469 gcc_unreachable ();
21472 /* Returns a function decl for a vectorized version of the builtin function
21473 with builtin function code FN and the result vector type TYPE, or NULL_TREE
21474 if it is not available. */
21477 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
21480 enum machine_mode in_mode, out_mode;
21483 if (TREE_CODE (type_out) != VECTOR_TYPE
21484 || TREE_CODE (type_in) != VECTOR_TYPE)
21487 out_mode = TYPE_MODE (TREE_TYPE (type_out));
21488 out_n = TYPE_VECTOR_SUBPARTS (type_out);
21489 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21490 in_n = TYPE_VECTOR_SUBPARTS (type_in);
21494 case BUILT_IN_SQRT:
21495 if (out_mode == DFmode && out_n == 2
21496 && in_mode == DFmode && in_n == 2)
21497 return ix86_builtins[IX86_BUILTIN_SQRTPD];
21500 case BUILT_IN_SQRTF:
21501 if (out_mode == SFmode && out_n == 4
21502 && in_mode == SFmode && in_n == 4)
21503 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
21506 case BUILT_IN_LRINT:
21507 if (out_mode == SImode && out_n == 4
21508 && in_mode == DFmode && in_n == 2)
21509 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
21512 case BUILT_IN_LRINTF:
21513 if (out_mode == SImode && out_n == 4
21514 && in_mode == SFmode && in_n == 4)
21515 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
21522 /* Dispatch to a handler for a vectorization library. */
21523 if (ix86_veclib_handler)
21524 return (*ix86_veclib_handler)(fn, type_out, type_in);
21529 /* Handler for an SVML-style interface to
21530 a library with vectorized intrinsics. */
21533 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
21536 tree fntype, new_fndecl, args;
21539 enum machine_mode el_mode, in_mode;
21542 /* The SVML is suitable for unsafe math only. */
21543 if (!flag_unsafe_math_optimizations)
21546 el_mode = TYPE_MODE (TREE_TYPE (type_out));
21547 n = TYPE_VECTOR_SUBPARTS (type_out);
21548 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21549 in_n = TYPE_VECTOR_SUBPARTS (type_in);
21550 if (el_mode != in_mode
21558 case BUILT_IN_LOG10:
21560 case BUILT_IN_TANH:
21562 case BUILT_IN_ATAN:
21563 case BUILT_IN_ATAN2:
21564 case BUILT_IN_ATANH:
21565 case BUILT_IN_CBRT:
21566 case BUILT_IN_SINH:
21568 case BUILT_IN_ASINH:
21569 case BUILT_IN_ASIN:
21570 case BUILT_IN_COSH:
21572 case BUILT_IN_ACOSH:
21573 case BUILT_IN_ACOS:
21574 if (el_mode != DFmode || n != 2)
21578 case BUILT_IN_EXPF:
21579 case BUILT_IN_LOGF:
21580 case BUILT_IN_LOG10F:
21581 case BUILT_IN_POWF:
21582 case BUILT_IN_TANHF:
21583 case BUILT_IN_TANF:
21584 case BUILT_IN_ATANF:
21585 case BUILT_IN_ATAN2F:
21586 case BUILT_IN_ATANHF:
21587 case BUILT_IN_CBRTF:
21588 case BUILT_IN_SINHF:
21589 case BUILT_IN_SINF:
21590 case BUILT_IN_ASINHF:
21591 case BUILT_IN_ASINF:
21592 case BUILT_IN_COSHF:
21593 case BUILT_IN_COSF:
21594 case BUILT_IN_ACOSHF:
21595 case BUILT_IN_ACOSF:
21596 if (el_mode != SFmode || n != 4)
21604 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
21606 if (fn == BUILT_IN_LOGF)
21607 strcpy (name, "vmlsLn4");
21608 else if (fn == BUILT_IN_LOG)
21609 strcpy (name, "vmldLn2");
21612 sprintf (name, "vmls%s", bname+10);
21613 name[strlen (name)-1] = '4';
21616 sprintf (name, "vmld%s2", bname+10);
21618 /* Convert to uppercase. */
21622 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
21623 args = TREE_CHAIN (args))
21627 fntype = build_function_type_list (type_out, type_in, NULL);
21629 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
21631 /* Build a function declaration for the vectorized function. */
21632 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
21633 TREE_PUBLIC (new_fndecl) = 1;
21634 DECL_EXTERNAL (new_fndecl) = 1;
21635 DECL_IS_NOVOPS (new_fndecl) = 1;
21636 TREE_READONLY (new_fndecl) = 1;
21641 /* Handler for an ACML-style interface to
21642 a library with vectorized intrinsics. */
21645 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
21647 char name[20] = "__vr.._";
21648 tree fntype, new_fndecl, args;
21651 enum machine_mode el_mode, in_mode;
21654 /* The ACML is 64bits only and suitable for unsafe math only as
21655 it does not correctly support parts of IEEE with the required
21656 precision such as denormals. */
21658 || !flag_unsafe_math_optimizations)
21661 el_mode = TYPE_MODE (TREE_TYPE (type_out));
21662 n = TYPE_VECTOR_SUBPARTS (type_out);
21663 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21664 in_n = TYPE_VECTOR_SUBPARTS (type_in);
21665 if (el_mode != in_mode
21675 case BUILT_IN_LOG2:
21676 case BUILT_IN_LOG10:
21679 if (el_mode != DFmode
21684 case BUILT_IN_SINF:
21685 case BUILT_IN_COSF:
21686 case BUILT_IN_EXPF:
21687 case BUILT_IN_POWF:
21688 case BUILT_IN_LOGF:
21689 case BUILT_IN_LOG2F:
21690 case BUILT_IN_LOG10F:
21693 if (el_mode != SFmode
21702 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
21703 sprintf (name + 7, "%s", bname+10);
21706 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
21707 args = TREE_CHAIN (args))
21711 fntype = build_function_type_list (type_out, type_in, NULL);
21713 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
21715 /* Build a function declaration for the vectorized function. */
21716 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
21717 TREE_PUBLIC (new_fndecl) = 1;
21718 DECL_EXTERNAL (new_fndecl) = 1;
21719 DECL_IS_NOVOPS (new_fndecl) = 1;
21720 TREE_READONLY (new_fndecl) = 1;
21726 /* Returns a decl of a function that implements conversion of the
21727 input vector of type TYPE, or NULL_TREE if it is not available. */
21730 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
21732 if (TREE_CODE (type) != VECTOR_TYPE)
21738 switch (TYPE_MODE (type))
21741 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
21746 case FIX_TRUNC_EXPR:
21747 switch (TYPE_MODE (type))
21750 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
21760 /* Returns a code for a target-specific builtin that implements
21761 reciprocal of the function, or NULL_TREE if not available. */
21764 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
21765 bool sqrt ATTRIBUTE_UNUSED)
21767 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
21768 && flag_finite_math_only && !flag_trapping_math
21769 && flag_unsafe_math_optimizations))
21773 /* Machine dependent builtins. */
21776 /* Vectorized version of sqrt to rsqrt conversion. */
21777 case IX86_BUILTIN_SQRTPS_NR:
21778 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
21784 /* Normal builtins. */
21787 /* Sqrt to rsqrt conversion. */
21788 case BUILT_IN_SQRTF:
21789 return ix86_builtins[IX86_BUILTIN_RSQRTF];
21796 /* Store OPERAND to the memory after reload is completed. This means
21797 that we can't easily use assign_stack_local. */
21799 ix86_force_to_memory (enum machine_mode mode, rtx operand)
21803 gcc_assert (reload_completed);
21804 if (TARGET_RED_ZONE)
21806 result = gen_rtx_MEM (mode,
21807 gen_rtx_PLUS (Pmode,
21809 GEN_INT (-RED_ZONE_SIZE)));
21810 emit_move_insn (result, operand);
21812 else if (!TARGET_RED_ZONE && TARGET_64BIT)
21818 operand = gen_lowpart (DImode, operand);
21822 gen_rtx_SET (VOIDmode,
21823 gen_rtx_MEM (DImode,
21824 gen_rtx_PRE_DEC (DImode,
21825 stack_pointer_rtx)),
21829 gcc_unreachable ();
21831 result = gen_rtx_MEM (mode, stack_pointer_rtx);
21840 split_di (&operand, 1, operands, operands + 1);
21842 gen_rtx_SET (VOIDmode,
21843 gen_rtx_MEM (SImode,
21844 gen_rtx_PRE_DEC (Pmode,
21845 stack_pointer_rtx)),
21848 gen_rtx_SET (VOIDmode,
21849 gen_rtx_MEM (SImode,
21850 gen_rtx_PRE_DEC (Pmode,
21851 stack_pointer_rtx)),
21856 /* Store HImodes as SImodes. */
21857 operand = gen_lowpart (SImode, operand);
21861 gen_rtx_SET (VOIDmode,
21862 gen_rtx_MEM (GET_MODE (operand),
21863 gen_rtx_PRE_DEC (SImode,
21864 stack_pointer_rtx)),
21868 gcc_unreachable ();
21870 result = gen_rtx_MEM (mode, stack_pointer_rtx);
21875 /* Free operand from the memory. */
21877 ix86_free_from_memory (enum machine_mode mode)
21879 if (!TARGET_RED_ZONE)
21883 if (mode == DImode || TARGET_64BIT)
21887 /* Use LEA to deallocate stack space. In peephole2 it will be converted
21888 to pop or add instruction if registers are available. */
21889 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
21890 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
21895 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
21896 QImode must go into class Q_REGS.
21897 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
21898 movdf to do mem-to-mem moves through integer regs. */
21900 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
21902 enum machine_mode mode = GET_MODE (x);
21904 /* We're only allowed to return a subclass of CLASS. Many of the
21905 following checks fail for NO_REGS, so eliminate that early. */
21906 if (regclass == NO_REGS)
21909 /* All classes can load zeros. */
21910 if (x == CONST0_RTX (mode))
21913 /* Force constants into memory if we are loading a (nonzero) constant into
21914 an MMX or SSE register. This is because there are no MMX/SSE instructions
21915 to load from a constant. */
21917 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
21920 /* Prefer SSE regs only, if we can use them for math. */
21921 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
21922 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
21924 /* Floating-point constants need more complex checks. */
21925 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
21927 /* General regs can load everything. */
21928 if (reg_class_subset_p (regclass, GENERAL_REGS))
21931 /* Floats can load 0 and 1 plus some others. Note that we eliminated
21932 zero above. We only want to wind up preferring 80387 registers if
21933 we plan on doing computation with them. */
21935 && standard_80387_constant_p (x))
21937 /* Limit class to non-sse. */
21938 if (regclass == FLOAT_SSE_REGS)
21940 if (regclass == FP_TOP_SSE_REGS)
21942 if (regclass == FP_SECOND_SSE_REGS)
21943 return FP_SECOND_REG;
21944 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
21951 /* Generally when we see PLUS here, it's the function invariant
21952 (plus soft-fp const_int). Which can only be computed into general
21954 if (GET_CODE (x) == PLUS)
21955 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
21957 /* QImode constants are easy to load, but non-constant QImode data
21958 must go into Q_REGS. */
21959 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
21961 if (reg_class_subset_p (regclass, Q_REGS))
21963 if (reg_class_subset_p (Q_REGS, regclass))
21971 /* Discourage putting floating-point values in SSE registers unless
21972 SSE math is being used, and likewise for the 387 registers. */
21974 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
21976 enum machine_mode mode = GET_MODE (x);
21978 /* Restrict the output reload class to the register bank that we are doing
21979 math on. If we would like not to return a subset of CLASS, reject this
21980 alternative: if reload cannot do this, it will still use its choice. */
21981 mode = GET_MODE (x);
21982 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21983 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
21985 if (X87_FLOAT_MODE_P (mode))
21987 if (regclass == FP_TOP_SSE_REGS)
21989 else if (regclass == FP_SECOND_SSE_REGS)
21990 return FP_SECOND_REG;
21992 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
21998 /* If we are copying between general and FP registers, we need a memory
21999 location. The same is true for SSE and MMX registers.
22001 To optimize register_move_cost performance, allow inline variant.
22003 The macro can't work reliably when one of the CLASSES is class containing
22004 registers from multiple units (SSE, MMX, integer). We avoid this by never
22005 combining those units in single alternative in the machine description.
22006 Ensure that this constraint holds to avoid unexpected surprises.
22008 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
22009 enforce these sanity checks. */
22012 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
22013 enum machine_mode mode, int strict)
22015 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
22016 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
22017 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
22018 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
22019 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
22020 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
22022 gcc_assert (!strict);
22026 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
22029 /* ??? This is a lie. We do have moves between mmx/general, and for
22030 mmx/sse2. But by saying we need secondary memory we discourage the
22031 register allocator from using the mmx registers unless needed. */
22032 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
22035 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
22037 /* SSE1 doesn't have any direct moves from other classes. */
22041 /* If the target says that inter-unit moves are more expensive
22042 than moving through memory, then don't generate them. */
22043 if (!TARGET_INTER_UNIT_MOVES)
22046 /* Between SSE and general, we have moves no larger than word size. */
22047 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22055 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
22056 enum machine_mode mode, int strict)
22058 return inline_secondary_memory_needed (class1, class2, mode, strict);
22061 /* Return true if the registers in CLASS cannot represent the change from
22062 modes FROM to TO. */
22065 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
22066 enum reg_class regclass)
22071 /* x87 registers can't do subreg at all, as all values are reformatted
22072 to extended precision. */
22073 if (MAYBE_FLOAT_CLASS_P (regclass))
22076 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
22078 /* Vector registers do not support QI or HImode loads. If we don't
22079 disallow a change to these modes, reload will assume it's ok to
22080 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
22081 the vec_dupv4hi pattern. */
22082 if (GET_MODE_SIZE (from) < 4)
22085 /* Vector registers do not support subreg with nonzero offsets, which
22086 are otherwise valid for integer registers. Since we can't see
22087 whether we have a nonzero offset from here, prohibit all
22088 nonparadoxical subregs changing size. */
22089 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
22096 /* Return the cost of moving data of mode M between a
22097 register and memory. A value of 2 is the default; this cost is
22098 relative to those in `REGISTER_MOVE_COST'.
22100 This function is used extensively by register_move_cost that is used to
22101 build tables at startup. Make it inline in this case.
22102 When IN is 2, return maximum of in and out move cost.
22104 If moving between registers and memory is more expensive than
22105 between two registers, you should define this macro to express the
22108 Model also increased moving costs of QImode registers in non
22112 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
22116 if (FLOAT_CLASS_P (regclass))
22134 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
22135 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
22137 if (SSE_CLASS_P (regclass))
22140 switch (GET_MODE_SIZE (mode))
22155 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
22156 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
22158 if (MMX_CLASS_P (regclass))
22161 switch (GET_MODE_SIZE (mode))
22173 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
22174 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
22176 switch (GET_MODE_SIZE (mode))
22179 if (Q_CLASS_P (regclass) || TARGET_64BIT)
22182 return ix86_cost->int_store[0];
22183 if (TARGET_PARTIAL_REG_DEPENDENCY && !optimize_size)
22184 cost = ix86_cost->movzbl_load;
22186 cost = ix86_cost->int_load[0];
22188 return MAX (cost, ix86_cost->int_store[0]);
22194 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
22196 return ix86_cost->movzbl_load;
22198 return ix86_cost->int_store[0] + 4;
22203 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
22204 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
22206 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
22207 if (mode == TFmode)
22210 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
22212 cost = ix86_cost->int_load[2];
22214 cost = ix86_cost->int_store[2];
22215 return (cost * (((int) GET_MODE_SIZE (mode)
22216 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
22221 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
22223 return inline_memory_move_cost (mode, regclass, in);
22227 /* Return the cost of moving data from a register in class CLASS1 to
22228 one in class CLASS2.
22230 It is not required that the cost always equal 2 when FROM is the same as TO;
22231 on some machines it is expensive to move between registers if they are not
22232 general registers. */
22235 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
22236 enum reg_class class2)
22238 /* In case we require secondary memory, compute cost of the store followed
22239 by load. In order to avoid bad register allocation choices, we need
22240 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
22242 if (inline_secondary_memory_needed (class1, class2, mode, 0))
22246 cost += inline_memory_move_cost (mode, class1, 2);
22247 cost += inline_memory_move_cost (mode, class2, 2);
22249 /* In case of copying from general_purpose_register we may emit multiple
22250 stores followed by single load causing memory size mismatch stall.
22251 Count this as arbitrarily high cost of 20. */
22252 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
22255 /* In the case of FP/MMX moves, the registers actually overlap, and we
22256 have to switch modes in order to treat them differently. */
22257 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
22258 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
22264 /* Moves between SSE/MMX and integer unit are expensive. */
22265 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
22266 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
22268 /* ??? By keeping returned value relatively high, we limit the number
22269 of moves between integer and MMX/SSE registers for all targets.
22270 Additionally, high value prevents problem with x86_modes_tieable_p(),
22271 where integer modes in MMX/SSE registers are not tieable
22272 because of missing QImode and HImode moves to, from or between
22273 MMX/SSE registers. */
22274 return MAX (8, ix86_cost->mmxsse_to_integer);
22276 if (MAYBE_FLOAT_CLASS_P (class1))
22277 return ix86_cost->fp_move;
22278 if (MAYBE_SSE_CLASS_P (class1))
22279 return ix86_cost->sse_move;
22280 if (MAYBE_MMX_CLASS_P (class1))
22281 return ix86_cost->mmx_move;
22285 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
22288 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
22290 /* Flags and only flags can only hold CCmode values. */
22291 if (CC_REGNO_P (regno))
22292 return GET_MODE_CLASS (mode) == MODE_CC;
22293 if (GET_MODE_CLASS (mode) == MODE_CC
22294 || GET_MODE_CLASS (mode) == MODE_RANDOM
22295 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
22297 if (FP_REGNO_P (regno))
22298 return VALID_FP_MODE_P (mode);
22299 if (SSE_REGNO_P (regno))
22301 /* We implement the move patterns for all vector modes into and
22302 out of SSE registers, even when no operation instructions
22304 return (VALID_SSE_REG_MODE (mode)
22305 || VALID_SSE2_REG_MODE (mode)
22306 || VALID_MMX_REG_MODE (mode)
22307 || VALID_MMX_REG_MODE_3DNOW (mode));
22309 if (MMX_REGNO_P (regno))
22311 /* We implement the move patterns for 3DNOW modes even in MMX mode,
22312 so if the register is available at all, then we can move data of
22313 the given mode into or out of it. */
22314 return (VALID_MMX_REG_MODE (mode)
22315 || VALID_MMX_REG_MODE_3DNOW (mode));
22318 if (mode == QImode)
22320 /* Take care for QImode values - they can be in non-QI regs,
22321 but then they do cause partial register stalls. */
22322 if (regno < 4 || TARGET_64BIT)
22324 if (!TARGET_PARTIAL_REG_STALL)
22326 return reload_in_progress || reload_completed;
22328 /* We handle both integer and floats in the general purpose registers. */
22329 else if (VALID_INT_MODE_P (mode))
22331 else if (VALID_FP_MODE_P (mode))
22333 else if (VALID_DFP_MODE_P (mode))
22335 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
22336 on to use that value in smaller contexts, this can easily force a
22337 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
22338 supporting DImode, allow it. */
22339 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
22345 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
22346 tieable integer mode. */
22349 ix86_tieable_integer_mode_p (enum machine_mode mode)
22358 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
22361 return TARGET_64BIT;
22368 /* Return true if MODE1 is accessible in a register that can hold MODE2
22369 without copying. That is, all register classes that can hold MODE2
22370 can also hold MODE1. */
22373 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
22375 if (mode1 == mode2)
22378 if (ix86_tieable_integer_mode_p (mode1)
22379 && ix86_tieable_integer_mode_p (mode2))
22382 /* MODE2 being XFmode implies fp stack or general regs, which means we
22383 can tie any smaller floating point modes to it. Note that we do not
22384 tie this with TFmode. */
22385 if (mode2 == XFmode)
22386 return mode1 == SFmode || mode1 == DFmode;
22388 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
22389 that we can tie it with SFmode. */
22390 if (mode2 == DFmode)
22391 return mode1 == SFmode;
22393 /* If MODE2 is only appropriate for an SSE register, then tie with
22394 any other mode acceptable to SSE registers. */
22395 if (GET_MODE_SIZE (mode2) == 16
22396 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
22397 return (GET_MODE_SIZE (mode1) == 16
22398 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
22400 /* If MODE2 is appropriate for an MMX register, then tie
22401 with any other mode acceptable to MMX registers. */
22402 if (GET_MODE_SIZE (mode2) == 8
22403 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
22404 return (GET_MODE_SIZE (mode1) == 8
22405 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
22410 /* Compute a (partial) cost for rtx X. Return true if the complete
22411 cost has been computed, and false if subexpressions should be
22412 scanned. In either case, *TOTAL contains the cost result. */
22415 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
22417 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
22418 enum machine_mode mode = GET_MODE (x);
22426 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
22428 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
22430 else if (flag_pic && SYMBOLIC_CONST (x)
22432 || (!GET_CODE (x) != LABEL_REF
22433 && (GET_CODE (x) != SYMBOL_REF
22434 || !SYMBOL_REF_LOCAL_P (x)))))
22441 if (mode == VOIDmode)
22444 switch (standard_80387_constant_p (x))
22449 default: /* Other constants */
22454 /* Start with (MEM (SYMBOL_REF)), since that's where
22455 it'll probably end up. Add a penalty for size. */
22456 *total = (COSTS_N_INSNS (1)
22457 + (flag_pic != 0 && !TARGET_64BIT)
22458 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
22464 /* The zero extensions is often completely free on x86_64, so make
22465 it as cheap as possible. */
22466 if (TARGET_64BIT && mode == DImode
22467 && GET_MODE (XEXP (x, 0)) == SImode)
22469 else if (TARGET_ZERO_EXTEND_WITH_AND)
22470 *total = ix86_cost->add;
22472 *total = ix86_cost->movzx;
22476 *total = ix86_cost->movsx;
22480 if (CONST_INT_P (XEXP (x, 1))
22481 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
22483 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22486 *total = ix86_cost->add;
22489 if ((value == 2 || value == 3)
22490 && ix86_cost->lea <= ix86_cost->shift_const)
22492 *total = ix86_cost->lea;
22502 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
22504 if (CONST_INT_P (XEXP (x, 1)))
22506 if (INTVAL (XEXP (x, 1)) > 32)
22507 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
22509 *total = ix86_cost->shift_const * 2;
22513 if (GET_CODE (XEXP (x, 1)) == AND)
22514 *total = ix86_cost->shift_var * 2;
22516 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
22521 if (CONST_INT_P (XEXP (x, 1)))
22522 *total = ix86_cost->shift_const;
22524 *total = ix86_cost->shift_var;
22529 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22531 /* ??? SSE scalar cost should be used here. */
22532 *total = ix86_cost->fmul;
22535 else if (X87_FLOAT_MODE_P (mode))
22537 *total = ix86_cost->fmul;
22540 else if (FLOAT_MODE_P (mode))
22542 /* ??? SSE vector cost should be used here. */
22543 *total = ix86_cost->fmul;
22548 rtx op0 = XEXP (x, 0);
22549 rtx op1 = XEXP (x, 1);
22551 if (CONST_INT_P (XEXP (x, 1)))
22553 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22554 for (nbits = 0; value != 0; value &= value - 1)
22558 /* This is arbitrary. */
22561 /* Compute costs correctly for widening multiplication. */
22562 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
22563 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
22564 == GET_MODE_SIZE (mode))
22566 int is_mulwiden = 0;
22567 enum machine_mode inner_mode = GET_MODE (op0);
22569 if (GET_CODE (op0) == GET_CODE (op1))
22570 is_mulwiden = 1, op1 = XEXP (op1, 0);
22571 else if (CONST_INT_P (op1))
22573 if (GET_CODE (op0) == SIGN_EXTEND)
22574 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
22577 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
22581 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
22584 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
22585 + nbits * ix86_cost->mult_bit
22586 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
22595 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22596 /* ??? SSE cost should be used here. */
22597 *total = ix86_cost->fdiv;
22598 else if (X87_FLOAT_MODE_P (mode))
22599 *total = ix86_cost->fdiv;
22600 else if (FLOAT_MODE_P (mode))
22601 /* ??? SSE vector cost should be used here. */
22602 *total = ix86_cost->fdiv;
22604 *total = ix86_cost->divide[MODE_INDEX (mode)];
22608 if (GET_MODE_CLASS (mode) == MODE_INT
22609 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
22611 if (GET_CODE (XEXP (x, 0)) == PLUS
22612 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
22613 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
22614 && CONSTANT_P (XEXP (x, 1)))
22616 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
22617 if (val == 2 || val == 4 || val == 8)
22619 *total = ix86_cost->lea;
22620 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
22621 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
22623 *total += rtx_cost (XEXP (x, 1), outer_code);
22627 else if (GET_CODE (XEXP (x, 0)) == MULT
22628 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
22630 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
22631 if (val == 2 || val == 4 || val == 8)
22633 *total = ix86_cost->lea;
22634 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
22635 *total += rtx_cost (XEXP (x, 1), outer_code);
22639 else if (GET_CODE (XEXP (x, 0)) == PLUS)
22641 *total = ix86_cost->lea;
22642 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
22643 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
22644 *total += rtx_cost (XEXP (x, 1), outer_code);
22651 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22653 /* ??? SSE cost should be used here. */
22654 *total = ix86_cost->fadd;
22657 else if (X87_FLOAT_MODE_P (mode))
22659 *total = ix86_cost->fadd;
22662 else if (FLOAT_MODE_P (mode))
22664 /* ??? SSE vector cost should be used here. */
22665 *total = ix86_cost->fadd;
22673 if (!TARGET_64BIT && mode == DImode)
22675 *total = (ix86_cost->add * 2
22676 + (rtx_cost (XEXP (x, 0), outer_code)
22677 << (GET_MODE (XEXP (x, 0)) != DImode))
22678 + (rtx_cost (XEXP (x, 1), outer_code)
22679 << (GET_MODE (XEXP (x, 1)) != DImode)));
22685 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22687 /* ??? SSE cost should be used here. */
22688 *total = ix86_cost->fchs;
22691 else if (X87_FLOAT_MODE_P (mode))
22693 *total = ix86_cost->fchs;
22696 else if (FLOAT_MODE_P (mode))
22698 /* ??? SSE vector cost should be used here. */
22699 *total = ix86_cost->fchs;
22705 if (!TARGET_64BIT && mode == DImode)
22706 *total = ix86_cost->add * 2;
22708 *total = ix86_cost->add;
22712 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
22713 && XEXP (XEXP (x, 0), 1) == const1_rtx
22714 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
22715 && XEXP (x, 1) == const0_rtx)
22717 /* This kind of construct is implemented using test[bwl].
22718 Treat it as if we had an AND. */
22719 *total = (ix86_cost->add
22720 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
22721 + rtx_cost (const1_rtx, outer_code));
22727 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
22732 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22733 /* ??? SSE cost should be used here. */
22734 *total = ix86_cost->fabs;
22735 else if (X87_FLOAT_MODE_P (mode))
22736 *total = ix86_cost->fabs;
22737 else if (FLOAT_MODE_P (mode))
22738 /* ??? SSE vector cost should be used here. */
22739 *total = ix86_cost->fabs;
22743 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22744 /* ??? SSE cost should be used here. */
22745 *total = ix86_cost->fsqrt;
22746 else if (X87_FLOAT_MODE_P (mode))
22747 *total = ix86_cost->fsqrt;
22748 else if (FLOAT_MODE_P (mode))
22749 /* ??? SSE vector cost should be used here. */
22750 *total = ix86_cost->fsqrt;
22754 if (XINT (x, 1) == UNSPEC_TP)
22765 static int current_machopic_label_num;
22767 /* Given a symbol name and its associated stub, write out the
22768 definition of the stub. */
22771 machopic_output_stub (FILE *file, const char *symb, const char *stub)
22773 unsigned int length;
22774 char *binder_name, *symbol_name, lazy_ptr_name[32];
22775 int label = ++current_machopic_label_num;
22777 /* For 64-bit we shouldn't get here. */
22778 gcc_assert (!TARGET_64BIT);
22780 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
22781 symb = (*targetm.strip_name_encoding) (symb);
22783 length = strlen (stub);
22784 binder_name = alloca (length + 32);
22785 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
22787 length = strlen (symb);
22788 symbol_name = alloca (length + 32);
22789 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
22791 sprintf (lazy_ptr_name, "L%d$lz", label);
22794 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
22796 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
22798 fprintf (file, "%s:\n", stub);
22799 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
22803 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
22804 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
22805 fprintf (file, "\tjmp\t*%%edx\n");
22808 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
22810 fprintf (file, "%s:\n", binder_name);
22814 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
22815 fprintf (file, "\tpushl\t%%eax\n");
22818 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
22820 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
22822 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
22823 fprintf (file, "%s:\n", lazy_ptr_name);
22824 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
22825 fprintf (file, "\t.long %s\n", binder_name);
22829 darwin_x86_file_end (void)
22831 darwin_file_end ();
22834 #endif /* TARGET_MACHO */
22836 /* Order the registers for register allocator. */
22839 x86_order_regs_for_local_alloc (void)
22844 /* First allocate the local general purpose registers. */
22845 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
22846 if (GENERAL_REGNO_P (i) && call_used_regs[i])
22847 reg_alloc_order [pos++] = i;
22849 /* Global general purpose registers. */
22850 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
22851 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
22852 reg_alloc_order [pos++] = i;
22854 /* x87 registers come first in case we are doing FP math
22856 if (!TARGET_SSE_MATH)
22857 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
22858 reg_alloc_order [pos++] = i;
22860 /* SSE registers. */
22861 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
22862 reg_alloc_order [pos++] = i;
22863 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
22864 reg_alloc_order [pos++] = i;
22866 /* x87 registers. */
22867 if (TARGET_SSE_MATH)
22868 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
22869 reg_alloc_order [pos++] = i;
22871 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
22872 reg_alloc_order [pos++] = i;
22874 /* Initialize the rest of array as we do not allocate some registers
22876 while (pos < FIRST_PSEUDO_REGISTER)
22877 reg_alloc_order [pos++] = 0;
22880 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
22881 struct attribute_spec.handler. */
22883 ix86_handle_struct_attribute (tree *node, tree name,
22884 tree args ATTRIBUTE_UNUSED,
22885 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
22888 if (DECL_P (*node))
22890 if (TREE_CODE (*node) == TYPE_DECL)
22891 type = &TREE_TYPE (*node);
22896 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
22897 || TREE_CODE (*type) == UNION_TYPE)))
22899 warning (OPT_Wattributes, "%qs attribute ignored",
22900 IDENTIFIER_POINTER (name));
22901 *no_add_attrs = true;
22904 else if ((is_attribute_p ("ms_struct", name)
22905 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
22906 || ((is_attribute_p ("gcc_struct", name)
22907 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
22909 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
22910 IDENTIFIER_POINTER (name));
22911 *no_add_attrs = true;
22918 ix86_ms_bitfield_layout_p (const_tree record_type)
22920 return (TARGET_MS_BITFIELD_LAYOUT &&
22921 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
22922 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
22925 /* Returns an expression indicating where the this parameter is
22926 located on entry to the FUNCTION. */
22929 x86_this_parameter (tree function)
22931 tree type = TREE_TYPE (function);
22932 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
22937 const int *parm_regs;
22939 if (TARGET_64BIT_MS_ABI)
22940 parm_regs = x86_64_ms_abi_int_parameter_registers;
22942 parm_regs = x86_64_int_parameter_registers;
22943 return gen_rtx_REG (DImode, parm_regs[aggr]);
22946 nregs = ix86_function_regparm (type, function);
22948 if (nregs > 0 && !stdarg_p (type))
22952 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
22953 regno = aggr ? DX_REG : CX_REG;
22961 return gen_rtx_MEM (SImode,
22962 plus_constant (stack_pointer_rtx, 4));
22965 return gen_rtx_REG (SImode, regno);
22968 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
22971 /* Determine whether x86_output_mi_thunk can succeed. */
22974 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
22975 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
22976 HOST_WIDE_INT vcall_offset, const_tree function)
22978 /* 64-bit can handle anything. */
22982 /* For 32-bit, everything's fine if we have one free register. */
22983 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
22986 /* Need a free register for vcall_offset. */
22990 /* Need a free register for GOT references. */
22991 if (flag_pic && !(*targetm.binds_local_p) (function))
22994 /* Otherwise ok. */
22998 /* Output the assembler code for a thunk function. THUNK_DECL is the
22999 declaration for the thunk function itself, FUNCTION is the decl for
23000 the target function. DELTA is an immediate constant offset to be
23001 added to THIS. If VCALL_OFFSET is nonzero, the word at
23002 *(*this + vcall_offset) should be added to THIS. */
23005 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
23006 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
23007 HOST_WIDE_INT vcall_offset, tree function)
23010 rtx this_param = x86_this_parameter (function);
23013 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
23014 pull it in now and let DELTA benefit. */
23015 if (REG_P (this_param))
23016 this_reg = this_param;
23017 else if (vcall_offset)
23019 /* Put the this parameter into %eax. */
23020 xops[0] = this_param;
23021 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
23022 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
23025 this_reg = NULL_RTX;
23027 /* Adjust the this parameter by a fixed constant. */
23030 xops[0] = GEN_INT (delta);
23031 xops[1] = this_reg ? this_reg : this_param;
23034 if (!x86_64_general_operand (xops[0], DImode))
23036 tmp = gen_rtx_REG (DImode, R10_REG);
23038 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
23040 xops[1] = this_param;
23042 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
23045 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
23048 /* Adjust the this parameter by a value stored in the vtable. */
23052 tmp = gen_rtx_REG (DImode, R10_REG);
23055 int tmp_regno = CX_REG;
23056 if (lookup_attribute ("fastcall",
23057 TYPE_ATTRIBUTES (TREE_TYPE (function))))
23058 tmp_regno = AX_REG;
23059 tmp = gen_rtx_REG (SImode, tmp_regno);
23062 xops[0] = gen_rtx_MEM (Pmode, this_reg);
23065 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
23067 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
23069 /* Adjust the this parameter. */
23070 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
23071 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
23073 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
23074 xops[0] = GEN_INT (vcall_offset);
23076 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
23077 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
23079 xops[1] = this_reg;
23081 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
23083 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
23086 /* If necessary, drop THIS back to its stack slot. */
23087 if (this_reg && this_reg != this_param)
23089 xops[0] = this_reg;
23090 xops[1] = this_param;
23091 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
23094 xops[0] = XEXP (DECL_RTL (function), 0);
23097 if (!flag_pic || (*targetm.binds_local_p) (function))
23098 output_asm_insn ("jmp\t%P0", xops);
23099 /* All thunks should be in the same object as their target,
23100 and thus binds_local_p should be true. */
23101 else if (TARGET_64BIT_MS_ABI)
23102 gcc_unreachable ();
23105 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
23106 tmp = gen_rtx_CONST (Pmode, tmp);
23107 tmp = gen_rtx_MEM (QImode, tmp);
23109 output_asm_insn ("jmp\t%A0", xops);
23114 if (!flag_pic || (*targetm.binds_local_p) (function))
23115 output_asm_insn ("jmp\t%P0", xops);
23120 rtx sym_ref = XEXP (DECL_RTL (function), 0);
23121 tmp = (gen_rtx_SYMBOL_REF
23123 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
23124 tmp = gen_rtx_MEM (QImode, tmp);
23126 output_asm_insn ("jmp\t%0", xops);
23129 #endif /* TARGET_MACHO */
23131 tmp = gen_rtx_REG (SImode, CX_REG);
23132 output_set_got (tmp, NULL_RTX);
23135 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
23136 output_asm_insn ("jmp\t{*}%1", xops);
23142 x86_file_start (void)
23144 default_file_start ();
23146 darwin_file_start ();
23148 if (X86_FILE_START_VERSION_DIRECTIVE)
23149 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
23150 if (X86_FILE_START_FLTUSED)
23151 fputs ("\t.global\t__fltused\n", asm_out_file);
23152 if (ix86_asm_dialect == ASM_INTEL)
23153 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
23157 x86_field_alignment (tree field, int computed)
23159 enum machine_mode mode;
23160 tree type = TREE_TYPE (field);
23162 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
23164 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
23165 ? get_inner_array_type (type) : type);
23166 if (mode == DFmode || mode == DCmode
23167 || GET_MODE_CLASS (mode) == MODE_INT
23168 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
23169 return MIN (32, computed);
23173 /* Output assembler code to FILE to increment profiler label # LABELNO
23174 for profiling a function entry. */
23176 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
23180 #ifndef NO_PROFILE_COUNTERS
23181 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
23184 if (!TARGET_64BIT_MS_ABI && flag_pic)
23185 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
23187 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
23191 #ifndef NO_PROFILE_COUNTERS
23192 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
23193 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
23195 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
23199 #ifndef NO_PROFILE_COUNTERS
23200 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
23201 PROFILE_COUNT_REGISTER);
23203 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
23207 /* We don't have exact information about the insn sizes, but we may assume
23208 quite safely that we are informed about all 1 byte insns and memory
23209 address sizes. This is enough to eliminate unnecessary padding in
23213 min_insn_size (rtx insn)
23217 if (!INSN_P (insn) || !active_insn_p (insn))
23220 /* Discard alignments we've emit and jump instructions. */
23221 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
23222 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
23225 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
23226 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
23229 /* Important case - calls are always 5 bytes.
23230 It is common to have many calls in the row. */
23232 && symbolic_reference_mentioned_p (PATTERN (insn))
23233 && !SIBLING_CALL_P (insn))
23235 if (get_attr_length (insn) <= 1)
23238 /* For normal instructions we may rely on the sizes of addresses
23239 and the presence of symbol to require 4 bytes of encoding.
23240 This is not the case for jumps where references are PC relative. */
23241 if (!JUMP_P (insn))
23243 l = get_attr_length_address (insn);
23244 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
23253 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
23257 ix86_avoid_jump_misspredicts (void)
23259 rtx insn, start = get_insns ();
23260 int nbytes = 0, njumps = 0;
23263 /* Look for all minimal intervals of instructions containing 4 jumps.
23264 The intervals are bounded by START and INSN. NBYTES is the total
23265 size of instructions in the interval including INSN and not including
23266 START. When the NBYTES is smaller than 16 bytes, it is possible
23267 that the end of START and INSN ends up in the same 16byte page.
23269 The smallest offset in the page INSN can start is the case where START
23270 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
23271 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
23273 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23276 nbytes += min_insn_size (insn);
23278 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
23279 INSN_UID (insn), min_insn_size (insn));
23281 && GET_CODE (PATTERN (insn)) != ADDR_VEC
23282 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
23290 start = NEXT_INSN (start);
23291 if ((JUMP_P (start)
23292 && GET_CODE (PATTERN (start)) != ADDR_VEC
23293 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
23295 njumps--, isjump = 1;
23298 nbytes -= min_insn_size (start);
23300 gcc_assert (njumps >= 0);
23302 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
23303 INSN_UID (start), INSN_UID (insn), nbytes);
23305 if (njumps == 3 && isjump && nbytes < 16)
23307 int padsize = 15 - nbytes + min_insn_size (insn);
23310 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
23311 INSN_UID (insn), padsize);
23312 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
23317 /* AMD Athlon works faster
23318 when RET is not destination of conditional jump or directly preceded
23319 by other jump instruction. We avoid the penalty by inserting NOP just
23320 before the RET instructions in such cases. */
23322 ix86_pad_returns (void)
23327 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
23329 basic_block bb = e->src;
23330 rtx ret = BB_END (bb);
23332 bool replace = false;
23334 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
23335 || !maybe_hot_bb_p (bb))
23337 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
23338 if (active_insn_p (prev) || LABEL_P (prev))
23340 if (prev && LABEL_P (prev))
23345 FOR_EACH_EDGE (e, ei, bb->preds)
23346 if (EDGE_FREQUENCY (e) && e->src->index >= 0
23347 && !(e->flags & EDGE_FALLTHRU))
23352 prev = prev_active_insn (ret);
23354 && ((JUMP_P (prev) && any_condjump_p (prev))
23357 /* Empty functions get branch mispredict even when the jump destination
23358 is not visible to us. */
23359 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
23364 emit_insn_before (gen_return_internal_long (), ret);
23370 /* Implement machine specific optimizations. We implement padding of returns
23371 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
23375 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
23376 ix86_pad_returns ();
23377 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
23378 ix86_avoid_jump_misspredicts ();
23381 /* Return nonzero when QImode register that must be represented via REX prefix
23384 x86_extended_QIreg_mentioned_p (rtx insn)
23387 extract_insn_cached (insn);
23388 for (i = 0; i < recog_data.n_operands; i++)
23389 if (REG_P (recog_data.operand[i])
23390 && REGNO (recog_data.operand[i]) >= 4)
23395 /* Return nonzero when P points to register encoded via REX prefix.
23396 Called via for_each_rtx. */
23398 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
23400 unsigned int regno;
23403 regno = REGNO (*p);
23404 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
23407 /* Return true when INSN mentions register that must be encoded using REX
23410 x86_extended_reg_mentioned_p (rtx insn)
23412 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
23415 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
23416 optabs would emit if we didn't have TFmode patterns. */
23419 x86_emit_floatuns (rtx operands[2])
23421 rtx neglab, donelab, i0, i1, f0, in, out;
23422 enum machine_mode mode, inmode;
23424 inmode = GET_MODE (operands[1]);
23425 gcc_assert (inmode == SImode || inmode == DImode);
23428 in = force_reg (inmode, operands[1]);
23429 mode = GET_MODE (out);
23430 neglab = gen_label_rtx ();
23431 donelab = gen_label_rtx ();
23432 f0 = gen_reg_rtx (mode);
23434 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
23436 expand_float (out, in, 0);
23438 emit_jump_insn (gen_jump (donelab));
23441 emit_label (neglab);
23443 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
23445 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
23447 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
23449 expand_float (f0, i0, 0);
23451 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
23453 emit_label (donelab);
23456 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23457 with all elements equal to VAR. Return true if successful. */
23460 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
23461 rtx target, rtx val)
23463 enum machine_mode smode, wsmode, wvmode;
23478 val = force_reg (GET_MODE_INNER (mode), val);
23479 x = gen_rtx_VEC_DUPLICATE (mode, val);
23480 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23486 if (TARGET_SSE || TARGET_3DNOW_A)
23488 val = gen_lowpart (SImode, val);
23489 x = gen_rtx_TRUNCATE (HImode, val);
23490 x = gen_rtx_VEC_DUPLICATE (mode, x);
23491 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23513 /* Extend HImode to SImode using a paradoxical SUBREG. */
23514 tmp1 = gen_reg_rtx (SImode);
23515 emit_move_insn (tmp1, gen_lowpart (SImode, val));
23516 /* Insert the SImode value as low element of V4SImode vector. */
23517 tmp2 = gen_reg_rtx (V4SImode);
23518 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
23519 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
23520 CONST0_RTX (V4SImode),
23522 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
23523 /* Cast the V4SImode vector back to a V8HImode vector. */
23524 tmp1 = gen_reg_rtx (V8HImode);
23525 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
23526 /* Duplicate the low short through the whole low SImode word. */
23527 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
23528 /* Cast the V8HImode vector back to a V4SImode vector. */
23529 tmp2 = gen_reg_rtx (V4SImode);
23530 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
23531 /* Replicate the low element of the V4SImode vector. */
23532 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
23533 /* Cast the V2SImode back to V8HImode, and store in target. */
23534 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
23545 /* Extend QImode to SImode using a paradoxical SUBREG. */
23546 tmp1 = gen_reg_rtx (SImode);
23547 emit_move_insn (tmp1, gen_lowpart (SImode, val));
23548 /* Insert the SImode value as low element of V4SImode vector. */
23549 tmp2 = gen_reg_rtx (V4SImode);
23550 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
23551 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
23552 CONST0_RTX (V4SImode),
23554 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
23555 /* Cast the V4SImode vector back to a V16QImode vector. */
23556 tmp1 = gen_reg_rtx (V16QImode);
23557 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
23558 /* Duplicate the low byte through the whole low SImode word. */
23559 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
23560 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
23561 /* Cast the V16QImode vector back to a V4SImode vector. */
23562 tmp2 = gen_reg_rtx (V4SImode);
23563 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
23564 /* Replicate the low element of the V4SImode vector. */
23565 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
23566 /* Cast the V2SImode back to V16QImode, and store in target. */
23567 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
23575 /* Replicate the value once into the next wider mode and recurse. */
23576 val = convert_modes (wsmode, smode, val, true);
23577 x = expand_simple_binop (wsmode, ASHIFT, val,
23578 GEN_INT (GET_MODE_BITSIZE (smode)),
23579 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23580 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
23582 x = gen_reg_rtx (wvmode);
23583 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
23584 gcc_unreachable ();
23585 emit_move_insn (target, gen_lowpart (mode, x));
23593 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23594 whose ONE_VAR element is VAR, and other elements are zero. Return true
23598 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
23599 rtx target, rtx var, int one_var)
23601 enum machine_mode vsimode;
23617 var = force_reg (GET_MODE_INNER (mode), var);
23618 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
23619 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23624 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
23625 new_target = gen_reg_rtx (mode);
23627 new_target = target;
23628 var = force_reg (GET_MODE_INNER (mode), var);
23629 x = gen_rtx_VEC_DUPLICATE (mode, var);
23630 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
23631 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
23634 /* We need to shuffle the value to the correct position, so
23635 create a new pseudo to store the intermediate result. */
23637 /* With SSE2, we can use the integer shuffle insns. */
23638 if (mode != V4SFmode && TARGET_SSE2)
23640 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
23642 GEN_INT (one_var == 1 ? 0 : 1),
23643 GEN_INT (one_var == 2 ? 0 : 1),
23644 GEN_INT (one_var == 3 ? 0 : 1)));
23645 if (target != new_target)
23646 emit_move_insn (target, new_target);
23650 /* Otherwise convert the intermediate result to V4SFmode and
23651 use the SSE1 shuffle instructions. */
23652 if (mode != V4SFmode)
23654 tmp = gen_reg_rtx (V4SFmode);
23655 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
23660 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
23662 GEN_INT (one_var == 1 ? 0 : 1),
23663 GEN_INT (one_var == 2 ? 0+4 : 1+4),
23664 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
23666 if (mode != V4SFmode)
23667 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
23668 else if (tmp != target)
23669 emit_move_insn (target, tmp);
23671 else if (target != new_target)
23672 emit_move_insn (target, new_target);
23677 vsimode = V4SImode;
23683 vsimode = V2SImode;
23689 /* Zero extend the variable element to SImode and recurse. */
23690 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
23692 x = gen_reg_rtx (vsimode);
23693 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
23695 gcc_unreachable ();
23697 emit_move_insn (target, gen_lowpart (mode, x));
23705 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23706 consisting of the values in VALS. It is known that all elements
23707 except ONE_VAR are constants. Return true if successful. */
23710 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
23711 rtx target, rtx vals, int one_var)
23713 rtx var = XVECEXP (vals, 0, one_var);
23714 enum machine_mode wmode;
23717 const_vec = copy_rtx (vals);
23718 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
23719 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
23727 /* For the two element vectors, it's just as easy to use
23728 the general case. */
23744 /* There's no way to set one QImode entry easily. Combine
23745 the variable value with its adjacent constant value, and
23746 promote to an HImode set. */
23747 x = XVECEXP (vals, 0, one_var ^ 1);
23750 var = convert_modes (HImode, QImode, var, true);
23751 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
23752 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23753 x = GEN_INT (INTVAL (x) & 0xff);
23757 var = convert_modes (HImode, QImode, var, true);
23758 x = gen_int_mode (INTVAL (x) << 8, HImode);
23760 if (x != const0_rtx)
23761 var = expand_simple_binop (HImode, IOR, var, x, var,
23762 1, OPTAB_LIB_WIDEN);
23764 x = gen_reg_rtx (wmode);
23765 emit_move_insn (x, gen_lowpart (wmode, const_vec));
23766 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
23768 emit_move_insn (target, gen_lowpart (mode, x));
23775 emit_move_insn (target, const_vec);
23776 ix86_expand_vector_set (mmx_ok, target, var, one_var);
23780 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
23781 all values variable, and none identical. */
23784 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
23785 rtx target, rtx vals)
23787 enum machine_mode half_mode = GET_MODE_INNER (mode);
23788 rtx op0 = NULL, op1 = NULL;
23789 bool use_vec_concat = false;
23795 if (!mmx_ok && !TARGET_SSE)
23801 /* For the two element vectors, we always implement VEC_CONCAT. */
23802 op0 = XVECEXP (vals, 0, 0);
23803 op1 = XVECEXP (vals, 0, 1);
23804 use_vec_concat = true;
23808 half_mode = V2SFmode;
23811 half_mode = V2SImode;
23817 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
23818 Recurse to load the two halves. */
23820 op0 = gen_reg_rtx (half_mode);
23821 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
23822 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
23824 op1 = gen_reg_rtx (half_mode);
23825 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
23826 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
23828 use_vec_concat = true;
23839 gcc_unreachable ();
23842 if (use_vec_concat)
23844 if (!register_operand (op0, half_mode))
23845 op0 = force_reg (half_mode, op0);
23846 if (!register_operand (op1, half_mode))
23847 op1 = force_reg (half_mode, op1);
23849 emit_insn (gen_rtx_SET (VOIDmode, target,
23850 gen_rtx_VEC_CONCAT (mode, op0, op1)));
23854 int i, j, n_elts, n_words, n_elt_per_word;
23855 enum machine_mode inner_mode;
23856 rtx words[4], shift;
23858 inner_mode = GET_MODE_INNER (mode);
23859 n_elts = GET_MODE_NUNITS (mode);
23860 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
23861 n_elt_per_word = n_elts / n_words;
23862 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
23864 for (i = 0; i < n_words; ++i)
23866 rtx word = NULL_RTX;
23868 for (j = 0; j < n_elt_per_word; ++j)
23870 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
23871 elt = convert_modes (word_mode, inner_mode, elt, true);
23877 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
23878 word, 1, OPTAB_LIB_WIDEN);
23879 word = expand_simple_binop (word_mode, IOR, word, elt,
23880 word, 1, OPTAB_LIB_WIDEN);
23888 emit_move_insn (target, gen_lowpart (mode, words[0]));
23889 else if (n_words == 2)
23891 rtx tmp = gen_reg_rtx (mode);
23892 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
23893 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
23894 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
23895 emit_move_insn (target, tmp);
23897 else if (n_words == 4)
23899 rtx tmp = gen_reg_rtx (V4SImode);
23900 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
23901 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
23902 emit_move_insn (target, gen_lowpart (mode, tmp));
23905 gcc_unreachable ();
23909 /* Initialize vector TARGET via VALS. Suppress the use of MMX
23910 instructions unless MMX_OK is true. */
23913 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
23915 enum machine_mode mode = GET_MODE (target);
23916 enum machine_mode inner_mode = GET_MODE_INNER (mode);
23917 int n_elts = GET_MODE_NUNITS (mode);
23918 int n_var = 0, one_var = -1;
23919 bool all_same = true, all_const_zero = true;
23923 for (i = 0; i < n_elts; ++i)
23925 x = XVECEXP (vals, 0, i);
23926 if (!(CONST_INT_P (x)
23927 || GET_CODE (x) == CONST_DOUBLE
23928 || GET_CODE (x) == CONST_FIXED))
23929 n_var++, one_var = i;
23930 else if (x != CONST0_RTX (inner_mode))
23931 all_const_zero = false;
23932 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
23936 /* Constants are best loaded from the constant pool. */
23939 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
23943 /* If all values are identical, broadcast the value. */
23945 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
23946 XVECEXP (vals, 0, 0)))
23949 /* Values where only one field is non-constant are best loaded from
23950 the pool and overwritten via move later. */
23954 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
23955 XVECEXP (vals, 0, one_var),
23959 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
23963 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
23967 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
23969 enum machine_mode mode = GET_MODE (target);
23970 enum machine_mode inner_mode = GET_MODE_INNER (mode);
23971 bool use_vec_merge = false;
23980 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
23981 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
23983 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
23985 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
23986 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
23992 use_vec_merge = TARGET_SSE4_1;
24000 /* For the two element vectors, we implement a VEC_CONCAT with
24001 the extraction of the other element. */
24003 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
24004 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
24007 op0 = val, op1 = tmp;
24009 op0 = tmp, op1 = val;
24011 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
24012 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
24017 use_vec_merge = TARGET_SSE4_1;
24024 use_vec_merge = true;
24028 /* tmp = target = A B C D */
24029 tmp = copy_to_reg (target);
24030 /* target = A A B B */
24031 emit_insn (gen_sse_unpcklps (target, target, target));
24032 /* target = X A B B */
24033 ix86_expand_vector_set (false, target, val, 0);
24034 /* target = A X C D */
24035 emit_insn (gen_sse_shufps_1 (target, target, tmp,
24036 GEN_INT (1), GEN_INT (0),
24037 GEN_INT (2+4), GEN_INT (3+4)));
24041 /* tmp = target = A B C D */
24042 tmp = copy_to_reg (target);
24043 /* tmp = X B C D */
24044 ix86_expand_vector_set (false, tmp, val, 0);
24045 /* target = A B X D */
24046 emit_insn (gen_sse_shufps_1 (target, target, tmp,
24047 GEN_INT (0), GEN_INT (1),
24048 GEN_INT (0+4), GEN_INT (3+4)));
24052 /* tmp = target = A B C D */
24053 tmp = copy_to_reg (target);
24054 /* tmp = X B C D */
24055 ix86_expand_vector_set (false, tmp, val, 0);
24056 /* target = A B X D */
24057 emit_insn (gen_sse_shufps_1 (target, target, tmp,
24058 GEN_INT (0), GEN_INT (1),
24059 GEN_INT (2+4), GEN_INT (0+4)));
24063 gcc_unreachable ();
24068 use_vec_merge = TARGET_SSE4_1;
24072 /* Element 0 handled by vec_merge below. */
24075 use_vec_merge = true;
24081 /* With SSE2, use integer shuffles to swap element 0 and ELT,
24082 store into element 0, then shuffle them back. */
24086 order[0] = GEN_INT (elt);
24087 order[1] = const1_rtx;
24088 order[2] = const2_rtx;
24089 order[3] = GEN_INT (3);
24090 order[elt] = const0_rtx;
24092 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
24093 order[1], order[2], order[3]));
24095 ix86_expand_vector_set (false, target, val, 0);
24097 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
24098 order[1], order[2], order[3]));
24102 /* For SSE1, we have to reuse the V4SF code. */
24103 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
24104 gen_lowpart (SFmode, val), elt);
24109 use_vec_merge = TARGET_SSE2;
24112 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
24116 use_vec_merge = TARGET_SSE4_1;
24126 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
24127 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
24128 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
24132 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
24134 emit_move_insn (mem, target);
24136 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
24137 emit_move_insn (tmp, val);
24139 emit_move_insn (target, mem);
24144 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
24146 enum machine_mode mode = GET_MODE (vec);
24147 enum machine_mode inner_mode = GET_MODE_INNER (mode);
24148 bool use_vec_extr = false;
24161 use_vec_extr = true;
24165 use_vec_extr = TARGET_SSE4_1;
24177 tmp = gen_reg_rtx (mode);
24178 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
24179 GEN_INT (elt), GEN_INT (elt),
24180 GEN_INT (elt+4), GEN_INT (elt+4)));
24184 tmp = gen_reg_rtx (mode);
24185 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
24189 gcc_unreachable ();
24192 use_vec_extr = true;
24197 use_vec_extr = TARGET_SSE4_1;
24211 tmp = gen_reg_rtx (mode);
24212 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
24213 GEN_INT (elt), GEN_INT (elt),
24214 GEN_INT (elt), GEN_INT (elt)));
24218 tmp = gen_reg_rtx (mode);
24219 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
24223 gcc_unreachable ();
24226 use_vec_extr = true;
24231 /* For SSE1, we have to reuse the V4SF code. */
24232 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
24233 gen_lowpart (V4SFmode, vec), elt);
24239 use_vec_extr = TARGET_SSE2;
24242 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
24246 use_vec_extr = TARGET_SSE4_1;
24250 /* ??? Could extract the appropriate HImode element and shift. */
24257 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
24258 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
24260 /* Let the rtl optimizers know about the zero extension performed. */
24261 if (inner_mode == QImode || inner_mode == HImode)
24263 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
24264 target = gen_lowpart (SImode, target);
24267 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
24271 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
24273 emit_move_insn (mem, vec);
24275 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
24276 emit_move_insn (target, tmp);
24280 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
24281 pattern to reduce; DEST is the destination; IN is the input vector. */
24284 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
24286 rtx tmp1, tmp2, tmp3;
24288 tmp1 = gen_reg_rtx (V4SFmode);
24289 tmp2 = gen_reg_rtx (V4SFmode);
24290 tmp3 = gen_reg_rtx (V4SFmode);
24292 emit_insn (gen_sse_movhlps (tmp1, in, in));
24293 emit_insn (fn (tmp2, tmp1, in));
24295 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
24296 GEN_INT (1), GEN_INT (1),
24297 GEN_INT (1+4), GEN_INT (1+4)));
24298 emit_insn (fn (dest, tmp2, tmp3));
24301 /* Target hook for scalar_mode_supported_p. */
24303 ix86_scalar_mode_supported_p (enum machine_mode mode)
24305 if (DECIMAL_FLOAT_MODE_P (mode))
24307 else if (mode == TFmode)
24308 return TARGET_64BIT;
24310 return default_scalar_mode_supported_p (mode);
24313 /* Implements target hook vector_mode_supported_p. */
24315 ix86_vector_mode_supported_p (enum machine_mode mode)
24317 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
24319 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
24321 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
24323 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
24328 /* Target hook for c_mode_for_suffix. */
24329 static enum machine_mode
24330 ix86_c_mode_for_suffix (char suffix)
24332 if (TARGET_64BIT && suffix == 'q')
24334 if (TARGET_MMX && suffix == 'w')
24340 /* Worker function for TARGET_MD_ASM_CLOBBERS.
24342 We do this in the new i386 backend to maintain source compatibility
24343 with the old cc0-based compiler. */
24346 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
24347 tree inputs ATTRIBUTE_UNUSED,
24350 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
24352 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
24357 /* Implements target vector targetm.asm.encode_section_info. This
24358 is not used by netware. */
24360 static void ATTRIBUTE_UNUSED
24361 ix86_encode_section_info (tree decl, rtx rtl, int first)
24363 default_encode_section_info (decl, rtl, first);
24365 if (TREE_CODE (decl) == VAR_DECL
24366 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
24367 && ix86_in_large_data_p (decl))
24368 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
24371 /* Worker function for REVERSE_CONDITION. */
24374 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
24376 return (mode != CCFPmode && mode != CCFPUmode
24377 ? reverse_condition (code)
24378 : reverse_condition_maybe_unordered (code));
24381 /* Output code to perform an x87 FP register move, from OPERANDS[1]
24385 output_387_reg_move (rtx insn, rtx *operands)
24387 if (REG_P (operands[0]))
24389 if (REG_P (operands[1])
24390 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
24392 if (REGNO (operands[0]) == FIRST_STACK_REG)
24393 return output_387_ffreep (operands, 0);
24394 return "fstp\t%y0";
24396 if (STACK_TOP_P (operands[0]))
24397 return "fld%z1\t%y1";
24400 else if (MEM_P (operands[0]))
24402 gcc_assert (REG_P (operands[1]));
24403 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
24404 return "fstp%z0\t%y0";
24407 /* There is no non-popping store to memory for XFmode.
24408 So if we need one, follow the store with a load. */
24409 if (GET_MODE (operands[0]) == XFmode)
24410 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
24412 return "fst%z0\t%y0";
24419 /* Output code to perform a conditional jump to LABEL, if C2 flag in
24420 FP status register is set. */
24423 ix86_emit_fp_unordered_jump (rtx label)
24425 rtx reg = gen_reg_rtx (HImode);
24428 emit_insn (gen_x86_fnstsw_1 (reg));
24430 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
24432 emit_insn (gen_x86_sahf_1 (reg));
24434 temp = gen_rtx_REG (CCmode, FLAGS_REG);
24435 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
24439 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
24441 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24442 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
24445 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
24446 gen_rtx_LABEL_REF (VOIDmode, label),
24448 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
24450 emit_jump_insn (temp);
24451 predict_jump (REG_BR_PROB_BASE * 10 / 100);
24454 /* Output code to perform a log1p XFmode calculation. */
24456 void ix86_emit_i387_log1p (rtx op0, rtx op1)
24458 rtx label1 = gen_label_rtx ();
24459 rtx label2 = gen_label_rtx ();
24461 rtx tmp = gen_reg_rtx (XFmode);
24462 rtx tmp2 = gen_reg_rtx (XFmode);
24464 emit_insn (gen_absxf2 (tmp, op1));
24465 emit_insn (gen_cmpxf (tmp,
24466 CONST_DOUBLE_FROM_REAL_VALUE (
24467 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
24469 emit_jump_insn (gen_bge (label1));
24471 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
24472 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
24473 emit_jump (label2);
24475 emit_label (label1);
24476 emit_move_insn (tmp, CONST1_RTX (XFmode));
24477 emit_insn (gen_addxf3 (tmp, op1, tmp));
24478 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
24479 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
24481 emit_label (label2);
24484 /* Output code to perform a Newton-Rhapson approximation of a single precision
24485 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
24487 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
24489 rtx x0, x1, e0, e1, two;
24491 x0 = gen_reg_rtx (mode);
24492 e0 = gen_reg_rtx (mode);
24493 e1 = gen_reg_rtx (mode);
24494 x1 = gen_reg_rtx (mode);
24496 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
24498 if (VECTOR_MODE_P (mode))
24499 two = ix86_build_const_vector (SFmode, true, two);
24501 two = force_reg (mode, two);
24503 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
24505 /* x0 = rcp(b) estimate */
24506 emit_insn (gen_rtx_SET (VOIDmode, x0,
24507 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
24510 emit_insn (gen_rtx_SET (VOIDmode, e0,
24511 gen_rtx_MULT (mode, x0, b)));
24513 emit_insn (gen_rtx_SET (VOIDmode, e1,
24514 gen_rtx_MINUS (mode, two, e0)));
24516 emit_insn (gen_rtx_SET (VOIDmode, x1,
24517 gen_rtx_MULT (mode, x0, e1)));
24519 emit_insn (gen_rtx_SET (VOIDmode, res,
24520 gen_rtx_MULT (mode, a, x1)));
24523 /* Output code to perform a Newton-Rhapson approximation of a
24524 single precision floating point [reciprocal] square root. */
24526 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
24529 rtx x0, e0, e1, e2, e3, mthree, mhalf;
24532 x0 = gen_reg_rtx (mode);
24533 e0 = gen_reg_rtx (mode);
24534 e1 = gen_reg_rtx (mode);
24535 e2 = gen_reg_rtx (mode);
24536 e3 = gen_reg_rtx (mode);
24538 real_from_integer (&r, VOIDmode, -3, -1, 0);
24539 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
24541 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
24542 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
24544 if (VECTOR_MODE_P (mode))
24546 mthree = ix86_build_const_vector (SFmode, true, mthree);
24547 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
24550 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
24551 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
24553 /* x0 = rsqrt(a) estimate */
24554 emit_insn (gen_rtx_SET (VOIDmode, x0,
24555 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
24558 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
24563 zero = gen_reg_rtx (mode);
24564 mask = gen_reg_rtx (mode);
24566 zero = force_reg (mode, CONST0_RTX(mode));
24567 emit_insn (gen_rtx_SET (VOIDmode, mask,
24568 gen_rtx_NE (mode, zero, a)));
24570 emit_insn (gen_rtx_SET (VOIDmode, x0,
24571 gen_rtx_AND (mode, x0, mask)));
24575 emit_insn (gen_rtx_SET (VOIDmode, e0,
24576 gen_rtx_MULT (mode, x0, a)));
24578 emit_insn (gen_rtx_SET (VOIDmode, e1,
24579 gen_rtx_MULT (mode, e0, x0)));
24582 mthree = force_reg (mode, mthree);
24583 emit_insn (gen_rtx_SET (VOIDmode, e2,
24584 gen_rtx_PLUS (mode, e1, mthree)));
24586 mhalf = force_reg (mode, mhalf);
24588 /* e3 = -.5 * x0 */
24589 emit_insn (gen_rtx_SET (VOIDmode, e3,
24590 gen_rtx_MULT (mode, x0, mhalf)));
24592 /* e3 = -.5 * e0 */
24593 emit_insn (gen_rtx_SET (VOIDmode, e3,
24594 gen_rtx_MULT (mode, e0, mhalf)));
24595 /* ret = e2 * e3 */
24596 emit_insn (gen_rtx_SET (VOIDmode, res,
24597 gen_rtx_MULT (mode, e2, e3)));
24600 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
24602 static void ATTRIBUTE_UNUSED
24603 i386_solaris_elf_named_section (const char *name, unsigned int flags,
24606 /* With Binutils 2.15, the "@unwind" marker must be specified on
24607 every occurrence of the ".eh_frame" section, not just the first
24610 && strcmp (name, ".eh_frame") == 0)
24612 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
24613 flags & SECTION_WRITE ? "aw" : "a");
24616 default_elf_asm_named_section (name, flags, decl);
24619 /* Return the mangling of TYPE if it is an extended fundamental type. */
24621 static const char *
24622 ix86_mangle_type (const_tree type)
24624 type = TYPE_MAIN_VARIANT (type);
24626 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
24627 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
24630 switch (TYPE_MODE (type))
24633 /* __float128 is "g". */
24636 /* "long double" or __float80 is "e". */
24643 /* For 32-bit code we can save PIC register setup by using
24644 __stack_chk_fail_local hidden function instead of calling
24645 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
24646 register, so it is better to call __stack_chk_fail directly. */
24649 ix86_stack_protect_fail (void)
24651 return TARGET_64BIT
24652 ? default_external_stack_protect_fail ()
24653 : default_hidden_stack_protect_fail ();
24656 /* Select a format to encode pointers in exception handling data. CODE
24657 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
24658 true if the symbol may be affected by dynamic relocations.
24660 ??? All x86 object file formats are capable of representing this.
24661 After all, the relocation needed is the same as for the call insn.
24662 Whether or not a particular assembler allows us to enter such, I
24663 guess we'll have to see. */
24665 asm_preferred_eh_data_format (int code, int global)
24669 int type = DW_EH_PE_sdata8;
24671 || ix86_cmodel == CM_SMALL_PIC
24672 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
24673 type = DW_EH_PE_sdata4;
24674 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
24676 if (ix86_cmodel == CM_SMALL
24677 || (ix86_cmodel == CM_MEDIUM && code))
24678 return DW_EH_PE_udata4;
24679 return DW_EH_PE_absptr;
24682 /* Expand copysign from SIGN to the positive value ABS_VALUE
24683 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
24686 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
24688 enum machine_mode mode = GET_MODE (sign);
24689 rtx sgn = gen_reg_rtx (mode);
24690 if (mask == NULL_RTX)
24692 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
24693 if (!VECTOR_MODE_P (mode))
24695 /* We need to generate a scalar mode mask in this case. */
24696 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
24697 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
24698 mask = gen_reg_rtx (mode);
24699 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
24703 mask = gen_rtx_NOT (mode, mask);
24704 emit_insn (gen_rtx_SET (VOIDmode, sgn,
24705 gen_rtx_AND (mode, mask, sign)));
24706 emit_insn (gen_rtx_SET (VOIDmode, result,
24707 gen_rtx_IOR (mode, abs_value, sgn)));
24710 /* Expand fabs (OP0) and return a new rtx that holds the result. The
24711 mask for masking out the sign-bit is stored in *SMASK, if that is
24714 ix86_expand_sse_fabs (rtx op0, rtx *smask)
24716 enum machine_mode mode = GET_MODE (op0);
24719 xa = gen_reg_rtx (mode);
24720 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
24721 if (!VECTOR_MODE_P (mode))
24723 /* We need to generate a scalar mode mask in this case. */
24724 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
24725 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
24726 mask = gen_reg_rtx (mode);
24727 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
24729 emit_insn (gen_rtx_SET (VOIDmode, xa,
24730 gen_rtx_AND (mode, op0, mask)));
24738 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
24739 swapping the operands if SWAP_OPERANDS is true. The expanded
24740 code is a forward jump to a newly created label in case the
24741 comparison is true. The generated label rtx is returned. */
24743 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
24744 bool swap_operands)
24755 label = gen_label_rtx ();
24756 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
24757 emit_insn (gen_rtx_SET (VOIDmode, tmp,
24758 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
24759 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
24760 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
24761 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
24762 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
24763 JUMP_LABEL (tmp) = label;
24768 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
24769 using comparison code CODE. Operands are swapped for the comparison if
24770 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
24772 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
24773 bool swap_operands)
24775 enum machine_mode mode = GET_MODE (op0);
24776 rtx mask = gen_reg_rtx (mode);
24785 if (mode == DFmode)
24786 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
24787 gen_rtx_fmt_ee (code, mode, op0, op1)));
24789 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
24790 gen_rtx_fmt_ee (code, mode, op0, op1)));
24795 /* Generate and return a rtx of mode MODE for 2**n where n is the number
24796 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
24798 ix86_gen_TWO52 (enum machine_mode mode)
24800 REAL_VALUE_TYPE TWO52r;
24803 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
24804 TWO52 = const_double_from_real_value (TWO52r, mode);
24805 TWO52 = force_reg (mode, TWO52);
24810 /* Expand SSE sequence for computing lround from OP1 storing
24813 ix86_expand_lround (rtx op0, rtx op1)
24815 /* C code for the stuff we're doing below:
24816 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
24819 enum machine_mode mode = GET_MODE (op1);
24820 const struct real_format *fmt;
24821 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
24824 /* load nextafter (0.5, 0.0) */
24825 fmt = REAL_MODE_FORMAT (mode);
24826 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
24827 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
24829 /* adj = copysign (0.5, op1) */
24830 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
24831 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
24833 /* adj = op1 + adj */
24834 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
24836 /* op0 = (imode)adj */
24837 expand_fix (op0, adj, 0);
24840 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
24843 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
24845 /* C code for the stuff we're doing below (for do_floor):
24847 xi -= (double)xi > op1 ? 1 : 0;
24850 enum machine_mode fmode = GET_MODE (op1);
24851 enum machine_mode imode = GET_MODE (op0);
24852 rtx ireg, freg, label, tmp;
24854 /* reg = (long)op1 */
24855 ireg = gen_reg_rtx (imode);
24856 expand_fix (ireg, op1, 0);
24858 /* freg = (double)reg */
24859 freg = gen_reg_rtx (fmode);
24860 expand_float (freg, ireg, 0);
24862 /* ireg = (freg > op1) ? ireg - 1 : ireg */
24863 label = ix86_expand_sse_compare_and_jump (UNLE,
24864 freg, op1, !do_floor);
24865 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
24866 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
24867 emit_move_insn (ireg, tmp);
24869 emit_label (label);
24870 LABEL_NUSES (label) = 1;
24872 emit_move_insn (op0, ireg);
24875 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
24876 result in OPERAND0. */
24878 ix86_expand_rint (rtx operand0, rtx operand1)
24880 /* C code for the stuff we're doing below:
24881 xa = fabs (operand1);
24882 if (!isless (xa, 2**52))
24884 xa = xa + 2**52 - 2**52;
24885 return copysign (xa, operand1);
24887 enum machine_mode mode = GET_MODE (operand0);
24888 rtx res, xa, label, TWO52, mask;
24890 res = gen_reg_rtx (mode);
24891 emit_move_insn (res, operand1);
24893 /* xa = abs (operand1) */
24894 xa = ix86_expand_sse_fabs (res, &mask);
24896 /* if (!isless (xa, TWO52)) goto label; */
24897 TWO52 = ix86_gen_TWO52 (mode);
24898 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24900 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
24901 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
24903 ix86_sse_copysign_to_positive (res, xa, res, mask);
24905 emit_label (label);
24906 LABEL_NUSES (label) = 1;
24908 emit_move_insn (operand0, res);
24911 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
24914 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
24916 /* C code for the stuff we expand below.
24917 double xa = fabs (x), x2;
24918 if (!isless (xa, TWO52))
24920 xa = xa + TWO52 - TWO52;
24921 x2 = copysign (xa, x);
24930 enum machine_mode mode = GET_MODE (operand0);
24931 rtx xa, TWO52, tmp, label, one, res, mask;
24933 TWO52 = ix86_gen_TWO52 (mode);
24935 /* Temporary for holding the result, initialized to the input
24936 operand to ease control flow. */
24937 res = gen_reg_rtx (mode);
24938 emit_move_insn (res, operand1);
24940 /* xa = abs (operand1) */
24941 xa = ix86_expand_sse_fabs (res, &mask);
24943 /* if (!isless (xa, TWO52)) goto label; */
24944 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
24946 /* xa = xa + TWO52 - TWO52; */
24947 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
24948 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
24950 /* xa = copysign (xa, operand1) */
24951 ix86_sse_copysign_to_positive (xa, xa, res, mask);
24953 /* generate 1.0 or -1.0 */
24954 one = force_reg (mode,
24955 const_double_from_real_value (do_floor
24956 ? dconst1 : dconstm1, mode));
24958 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
24959 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
24960 emit_insn (gen_rtx_SET (VOIDmode, tmp,
24961 gen_rtx_AND (mode, one, tmp)));
24962 /* We always need to subtract here to preserve signed zero. */
24963 tmp = expand_simple_binop (mode, MINUS,
24964 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
24965 emit_move_insn (res, tmp);
24967 emit_label (label);
24968 LABEL_NUSES (label) = 1;
24970 emit_move_insn (operand0, res);
24973 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
24976 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
24978 /* C code for the stuff we expand below.
24979 double xa = fabs (x), x2;
24980 if (!isless (xa, TWO52))
24982 x2 = (double)(long)x;
24989 if (HONOR_SIGNED_ZEROS (mode))
24990 return copysign (x2, x);
24993 enum machine_mode mode = GET_MODE (operand0);
24994 rtx xa, xi, TWO52, tmp, label, one, res, mask;
24996 TWO52 = ix86_gen_TWO52 (mode);
24998 /* Temporary for holding the result, initialized to the input
24999 operand to ease control flow. */
25000 res = gen_reg_rtx (mode);
25001 emit_move_insn (res, operand1);
25003 /* xa = abs (operand1) */
25004 xa = ix86_expand_sse_fabs (res, &mask);
25006 /* if (!isless (xa, TWO52)) goto label; */
25007 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25009 /* xa = (double)(long)x */
25010 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
25011 expand_fix (xi, res, 0);
25012 expand_float (xa, xi, 0);
25015 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
25017 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
25018 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
25019 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25020 gen_rtx_AND (mode, one, tmp)));
25021 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
25022 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
25023 emit_move_insn (res, tmp);
25025 if (HONOR_SIGNED_ZEROS (mode))
25026 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
25028 emit_label (label);
25029 LABEL_NUSES (label) = 1;
25031 emit_move_insn (operand0, res);
25034 /* Expand SSE sequence for computing round from OPERAND1 storing
25035 into OPERAND0. Sequence that works without relying on DImode truncation
25036 via cvttsd2siq that is only available on 64bit targets. */
25038 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
25040 /* C code for the stuff we expand below.
25041 double xa = fabs (x), xa2, x2;
25042 if (!isless (xa, TWO52))
25044 Using the absolute value and copying back sign makes
25045 -0.0 -> -0.0 correct.
25046 xa2 = xa + TWO52 - TWO52;
25051 else if (dxa > 0.5)
25053 x2 = copysign (xa2, x);
25056 enum machine_mode mode = GET_MODE (operand0);
25057 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
25059 TWO52 = ix86_gen_TWO52 (mode);
25061 /* Temporary for holding the result, initialized to the input
25062 operand to ease control flow. */
25063 res = gen_reg_rtx (mode);
25064 emit_move_insn (res, operand1);
25066 /* xa = abs (operand1) */
25067 xa = ix86_expand_sse_fabs (res, &mask);
25069 /* if (!isless (xa, TWO52)) goto label; */
25070 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25072 /* xa2 = xa + TWO52 - TWO52; */
25073 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
25074 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
25076 /* dxa = xa2 - xa; */
25077 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
25079 /* generate 0.5, 1.0 and -0.5 */
25080 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
25081 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
25082 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
25086 tmp = gen_reg_rtx (mode);
25087 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
25088 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
25089 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25090 gen_rtx_AND (mode, one, tmp)));
25091 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
25092 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
25093 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
25094 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25095 gen_rtx_AND (mode, one, tmp)));
25096 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
25098 /* res = copysign (xa2, operand1) */
25099 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
25101 emit_label (label);
25102 LABEL_NUSES (label) = 1;
25104 emit_move_insn (operand0, res);
25107 /* Expand SSE sequence for computing trunc from OPERAND1 storing
25110 ix86_expand_trunc (rtx operand0, rtx operand1)
25112 /* C code for SSE variant we expand below.
25113 double xa = fabs (x), x2;
25114 if (!isless (xa, TWO52))
25116 x2 = (double)(long)x;
25117 if (HONOR_SIGNED_ZEROS (mode))
25118 return copysign (x2, x);
25121 enum machine_mode mode = GET_MODE (operand0);
25122 rtx xa, xi, TWO52, label, res, mask;
25124 TWO52 = ix86_gen_TWO52 (mode);
25126 /* Temporary for holding the result, initialized to the input
25127 operand to ease control flow. */
25128 res = gen_reg_rtx (mode);
25129 emit_move_insn (res, operand1);
25131 /* xa = abs (operand1) */
25132 xa = ix86_expand_sse_fabs (res, &mask);
25134 /* if (!isless (xa, TWO52)) goto label; */
25135 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25137 /* x = (double)(long)x */
25138 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
25139 expand_fix (xi, res, 0);
25140 expand_float (res, xi, 0);
25142 if (HONOR_SIGNED_ZEROS (mode))
25143 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
25145 emit_label (label);
25146 LABEL_NUSES (label) = 1;
25148 emit_move_insn (operand0, res);
25151 /* Expand SSE sequence for computing trunc from OPERAND1 storing
25154 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
25156 enum machine_mode mode = GET_MODE (operand0);
25157 rtx xa, mask, TWO52, label, one, res, smask, tmp;
25159 /* C code for SSE variant we expand below.
25160 double xa = fabs (x), x2;
25161 if (!isless (xa, TWO52))
25163 xa2 = xa + TWO52 - TWO52;
25167 x2 = copysign (xa2, x);
25171 TWO52 = ix86_gen_TWO52 (mode);
25173 /* Temporary for holding the result, initialized to the input
25174 operand to ease control flow. */
25175 res = gen_reg_rtx (mode);
25176 emit_move_insn (res, operand1);
25178 /* xa = abs (operand1) */
25179 xa = ix86_expand_sse_fabs (res, &smask);
25181 /* if (!isless (xa, TWO52)) goto label; */
25182 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25184 /* res = xa + TWO52 - TWO52; */
25185 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
25186 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
25187 emit_move_insn (res, tmp);
25190 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
25192 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
25193 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
25194 emit_insn (gen_rtx_SET (VOIDmode, mask,
25195 gen_rtx_AND (mode, mask, one)));
25196 tmp = expand_simple_binop (mode, MINUS,
25197 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
25198 emit_move_insn (res, tmp);
25200 /* res = copysign (res, operand1) */
25201 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
25203 emit_label (label);
25204 LABEL_NUSES (label) = 1;
25206 emit_move_insn (operand0, res);
25209 /* Expand SSE sequence for computing round from OPERAND1 storing
25212 ix86_expand_round (rtx operand0, rtx operand1)
25214 /* C code for the stuff we're doing below:
25215 double xa = fabs (x);
25216 if (!isless (xa, TWO52))
25218 xa = (double)(long)(xa + nextafter (0.5, 0.0));
25219 return copysign (xa, x);
25221 enum machine_mode mode = GET_MODE (operand0);
25222 rtx res, TWO52, xa, label, xi, half, mask;
25223 const struct real_format *fmt;
25224 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
25226 /* Temporary for holding the result, initialized to the input
25227 operand to ease control flow. */
25228 res = gen_reg_rtx (mode);
25229 emit_move_insn (res, operand1);
25231 TWO52 = ix86_gen_TWO52 (mode);
25232 xa = ix86_expand_sse_fabs (res, &mask);
25233 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25235 /* load nextafter (0.5, 0.0) */
25236 fmt = REAL_MODE_FORMAT (mode);
25237 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
25238 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
25240 /* xa = xa + 0.5 */
25241 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
25242 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
25244 /* xa = (double)(int64_t)xa */
25245 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
25246 expand_fix (xi, xa, 0);
25247 expand_float (xa, xi, 0);
25249 /* res = copysign (xa, operand1) */
25250 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
25252 emit_label (label);
25253 LABEL_NUSES (label) = 1;
25255 emit_move_insn (operand0, res);
25259 /* Validate whether a SSE5 instruction is valid or not.
25260 OPERANDS is the array of operands.
25261 NUM is the number of operands.
25262 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
25263 NUM_MEMORY is the maximum number of memory operands to accept. */
25265 ix86_sse5_valid_op_p (rtx operands[], rtx insn, int num, bool uses_oc0, int num_memory)
25271 /* Count the number of memory arguments */
25274 for (i = 0; i < num; i++)
25276 enum machine_mode mode = GET_MODE (operands[i]);
25277 if (register_operand (operands[i], mode))
25280 else if (memory_operand (operands[i], mode))
25282 mem_mask |= (1 << i);
25288 rtx pattern = PATTERN (insn);
25290 /* allow 0 for pcmov */
25291 if (GET_CODE (pattern) != SET
25292 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
25294 || operands[i] != CONST0_RTX (mode))
25299 /* If there were no memory operations, allow the insn */
25303 /* Do not allow the destination register to be a memory operand. */
25304 else if (mem_mask & (1 << 0))
25307 /* If there are too many memory operations, disallow the instruction. While
25308 the hardware only allows 1 memory reference, before register allocation
25309 for some insns, we allow two memory operations sometimes in order to allow
25310 code like the following to be optimized:
25312 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
25314 or similar cases that are vectorized into using the fmaddss
25316 else if (mem_count > num_memory)
25319 /* Don't allow more than one memory operation if not optimizing. */
25320 else if (mem_count > 1 && !optimize)
25323 else if (num == 4 && mem_count == 1)
25325 /* formats (destination is the first argument), example fmaddss:
25326 xmm1, xmm1, xmm2, xmm3/mem
25327 xmm1, xmm1, xmm2/mem, xmm3
25328 xmm1, xmm2, xmm3/mem, xmm1
25329 xmm1, xmm2/mem, xmm3, xmm1 */
25331 return ((mem_mask == (1 << 1))
25332 || (mem_mask == (1 << 2))
25333 || (mem_mask == (1 << 3)));
25335 /* format, example pmacsdd:
25336 xmm1, xmm2, xmm3/mem, xmm1 */
25338 return (mem_mask == (1 << 2));
25341 else if (num == 4 && num_memory == 2)
25343 /* If there are two memory operations, we can load one of the memory ops
25344 into the destination register. This is for optimizing the
25345 multiply/add ops, which the combiner has optimized both the multiply
25346 and the add insns to have a memory operation. We have to be careful
25347 that the destination doesn't overlap with the inputs. */
25348 rtx op0 = operands[0];
25350 if (reg_mentioned_p (op0, operands[1])
25351 || reg_mentioned_p (op0, operands[2])
25352 || reg_mentioned_p (op0, operands[3]))
25355 /* formats (destination is the first argument), example fmaddss:
25356 xmm1, xmm1, xmm2, xmm3/mem
25357 xmm1, xmm1, xmm2/mem, xmm3
25358 xmm1, xmm2, xmm3/mem, xmm1
25359 xmm1, xmm2/mem, xmm3, xmm1
25361 For the oc0 case, we will load either operands[1] or operands[3] into
25362 operands[0], so any combination of 2 memory operands is ok. */
25366 /* format, example pmacsdd:
25367 xmm1, xmm2, xmm3/mem, xmm1
25369 For the integer multiply/add instructions be more restrictive and
25370 require operands[2] and operands[3] to be the memory operands. */
25372 return (mem_mask == ((1 << 2) | (1 << 3)));
25375 else if (num == 3 && num_memory == 1)
25377 /* formats, example protb:
25378 xmm1, xmm2, xmm3/mem
25379 xmm1, xmm2/mem, xmm3 */
25381 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
25383 /* format, example comeq:
25384 xmm1, xmm2, xmm3/mem */
25386 return (mem_mask == (1 << 2));
25390 gcc_unreachable ();
25396 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
25397 hardware will allow by using the destination register to load one of the
25398 memory operations. Presently this is used by the multiply/add routines to
25399 allow 2 memory references. */
25402 ix86_expand_sse5_multiple_memory (rtx operands[],
25404 enum machine_mode mode)
25406 rtx op0 = operands[0];
25408 || memory_operand (op0, mode)
25409 || reg_mentioned_p (op0, operands[1])
25410 || reg_mentioned_p (op0, operands[2])
25411 || reg_mentioned_p (op0, operands[3]))
25412 gcc_unreachable ();
25414 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
25415 the destination register. */
25416 if (memory_operand (operands[1], mode))
25418 emit_move_insn (op0, operands[1]);
25421 else if (memory_operand (operands[3], mode))
25423 emit_move_insn (op0, operands[3]);
25427 gcc_unreachable ();
25433 /* Table of valid machine attributes. */
25434 static const struct attribute_spec ix86_attribute_table[] =
25436 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
25437 /* Stdcall attribute says callee is responsible for popping arguments
25438 if they are not variable. */
25439 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25440 /* Fastcall attribute says callee is responsible for popping arguments
25441 if they are not variable. */
25442 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25443 /* Cdecl attribute says the callee is a normal C declaration */
25444 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25445 /* Regparm attribute specifies how many integer arguments are to be
25446 passed in registers. */
25447 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
25448 /* Sseregparm attribute says we are using x86_64 calling conventions
25449 for FP arguments. */
25450 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25451 /* force_align_arg_pointer says this function realigns the stack at entry. */
25452 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
25453 false, true, true, ix86_handle_cconv_attribute },
25454 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25455 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
25456 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
25457 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
25459 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
25460 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
25461 #ifdef SUBTARGET_ATTRIBUTE_TABLE
25462 SUBTARGET_ATTRIBUTE_TABLE,
25464 { NULL, 0, 0, false, false, false, NULL }
25467 /* Implement targetm.vectorize.builtin_vectorization_cost. */
25469 x86_builtin_vectorization_cost (bool runtime_test)
25471 /* If the branch of the runtime test is taken - i.e. - the vectorized
25472 version is skipped - this incurs a misprediction cost (because the
25473 vectorized version is expected to be the fall-through). So we subtract
25474 the latency of a mispredicted branch from the costs that are incured
25475 when the vectorized version is executed.
25477 TODO: The values in individual target tables have to be tuned or new
25478 fields may be needed. For eg. on K8, the default branch path is the
25479 not-taken path. If the taken path is predicted correctly, the minimum
25480 penalty of going down the taken-path is 1 cycle. If the taken-path is
25481 not predicted correctly, then the minimum penalty is 10 cycles. */
25485 return (-(ix86_cost->cond_taken_branch_cost));
25491 /* Initialize the GCC target structure. */
25492 #undef TARGET_ATTRIBUTE_TABLE
25493 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
25494 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25495 # undef TARGET_MERGE_DECL_ATTRIBUTES
25496 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
25499 #undef TARGET_COMP_TYPE_ATTRIBUTES
25500 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
25502 #undef TARGET_INIT_BUILTINS
25503 #define TARGET_INIT_BUILTINS ix86_init_builtins
25504 #undef TARGET_EXPAND_BUILTIN
25505 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
25507 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
25508 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
25509 ix86_builtin_vectorized_function
25511 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
25512 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
25514 #undef TARGET_BUILTIN_RECIPROCAL
25515 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
25517 #undef TARGET_ASM_FUNCTION_EPILOGUE
25518 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
25520 #undef TARGET_ENCODE_SECTION_INFO
25521 #ifndef SUBTARGET_ENCODE_SECTION_INFO
25522 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
25524 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
25527 #undef TARGET_ASM_OPEN_PAREN
25528 #define TARGET_ASM_OPEN_PAREN ""
25529 #undef TARGET_ASM_CLOSE_PAREN
25530 #define TARGET_ASM_CLOSE_PAREN ""
25532 #undef TARGET_ASM_ALIGNED_HI_OP
25533 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
25534 #undef TARGET_ASM_ALIGNED_SI_OP
25535 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
25537 #undef TARGET_ASM_ALIGNED_DI_OP
25538 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
25541 #undef TARGET_ASM_UNALIGNED_HI_OP
25542 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
25543 #undef TARGET_ASM_UNALIGNED_SI_OP
25544 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
25545 #undef TARGET_ASM_UNALIGNED_DI_OP
25546 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
25548 #undef TARGET_SCHED_ADJUST_COST
25549 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
25550 #undef TARGET_SCHED_ISSUE_RATE
25551 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
25552 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
25553 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
25554 ia32_multipass_dfa_lookahead
25556 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
25557 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
25560 #undef TARGET_HAVE_TLS
25561 #define TARGET_HAVE_TLS true
25563 #undef TARGET_CANNOT_FORCE_CONST_MEM
25564 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
25565 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
25566 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
25568 #undef TARGET_DELEGITIMIZE_ADDRESS
25569 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
25571 #undef TARGET_MS_BITFIELD_LAYOUT_P
25572 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
25575 #undef TARGET_BINDS_LOCAL_P
25576 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
25578 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25579 #undef TARGET_BINDS_LOCAL_P
25580 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
25583 #undef TARGET_ASM_OUTPUT_MI_THUNK
25584 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
25585 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
25586 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
25588 #undef TARGET_ASM_FILE_START
25589 #define TARGET_ASM_FILE_START x86_file_start
25591 #undef TARGET_DEFAULT_TARGET_FLAGS
25592 #define TARGET_DEFAULT_TARGET_FLAGS \
25594 | TARGET_SUBTARGET_DEFAULT \
25595 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
25597 #undef TARGET_HANDLE_OPTION
25598 #define TARGET_HANDLE_OPTION ix86_handle_option
25600 #undef TARGET_RTX_COSTS
25601 #define TARGET_RTX_COSTS ix86_rtx_costs
25602 #undef TARGET_ADDRESS_COST
25603 #define TARGET_ADDRESS_COST ix86_address_cost
25605 #undef TARGET_FIXED_CONDITION_CODE_REGS
25606 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
25607 #undef TARGET_CC_MODES_COMPATIBLE
25608 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
25610 #undef TARGET_MACHINE_DEPENDENT_REORG
25611 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
25613 #undef TARGET_BUILD_BUILTIN_VA_LIST
25614 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
25616 #undef TARGET_EXPAND_BUILTIN_VA_START
25617 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
25619 #undef TARGET_MD_ASM_CLOBBERS
25620 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
25622 #undef TARGET_PROMOTE_PROTOTYPES
25623 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
25624 #undef TARGET_STRUCT_VALUE_RTX
25625 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
25626 #undef TARGET_SETUP_INCOMING_VARARGS
25627 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
25628 #undef TARGET_MUST_PASS_IN_STACK
25629 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
25630 #undef TARGET_PASS_BY_REFERENCE
25631 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
25632 #undef TARGET_INTERNAL_ARG_POINTER
25633 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
25634 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
25635 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
25636 #undef TARGET_STRICT_ARGUMENT_NAMING
25637 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
25639 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
25640 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
25642 #undef TARGET_SCALAR_MODE_SUPPORTED_P
25643 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
25645 #undef TARGET_VECTOR_MODE_SUPPORTED_P
25646 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
25648 #undef TARGET_C_MODE_FOR_SUFFIX
25649 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
25652 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
25653 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
25656 #ifdef SUBTARGET_INSERT_ATTRIBUTES
25657 #undef TARGET_INSERT_ATTRIBUTES
25658 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
25661 #undef TARGET_MANGLE_TYPE
25662 #define TARGET_MANGLE_TYPE ix86_mangle_type
25664 #undef TARGET_STACK_PROTECT_FAIL
25665 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
25667 #undef TARGET_FUNCTION_VALUE
25668 #define TARGET_FUNCTION_VALUE ix86_function_value
25670 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
25671 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
25673 struct gcc_target targetm = TARGET_INITIALIZER;
25675 #include "gt-i386.h"